bug #936, patch 1.5/3: rename _FUSED_ macros to _SINGLE_INSTRUCTION_,

because this is what they are about. "Fused" means "no intermediate rounding
between the mul and the add, only one rounding at the end". Instead,
what we are concerned about here is whether a temporary register is needed,
i.e. whether the MUL and ADD are separate instructions.
Concretely, on ARM NEON, a single-instruction mul-add is always available: VMLA.
But a true fused mul-add is only available on VFPv4: VFMA.
This commit is contained in:
Benoit Jacob 2015-01-31 14:15:57 -05:00
parent 9f99f61e69
commit 340b8afb14
5 changed files with 18 additions and 18 deletions

View File

@ -23,8 +23,8 @@ namespace internal {
#endif
#ifdef EIGEN_VECTORIZE_FMA
#ifndef EIGEN_HAS_FUSED_MADD
#define EIGEN_HAS_FUSED_MADD 1
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
#endif
#endif

View File

@ -18,12 +18,12 @@ namespace internal {
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
#endif
#ifndef EIGEN_HAS_FUSED_MADD
#define EIGEN_HAS_FUSED_MADD 1
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
#endif
#ifndef EIGEN_HAS_FUSED_CJMADD
#define EIGEN_HAS_FUSED_CJMADD
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#endif
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16

View File

@ -20,12 +20,12 @@ namespace internal {
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
#endif
#ifndef EIGEN_HAS_FUSED_MADD
#define EIGEN_HAS_FUSED_MADD 1
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
#endif
#ifndef EIGEN_HAS_FUSED_CJMADD
#define EIGEN_HAS_FUSED_CJMADD
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#endif
// FIXME NEON has 16 quad registers, but since the current register allocator

View File

@ -23,8 +23,8 @@ namespace internal {
#endif
#ifdef EIGEN_VECTORIZE_FMA
#ifndef EIGEN_HAS_FUSED_MADD
#define EIGEN_HAS_FUSED_MADD 1
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
#endif
#endif

View File

@ -120,7 +120,7 @@ inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
}
#ifdef EIGEN_HAS_FUSED_CJMADD
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
#else
@ -182,7 +182,7 @@ public:
nr = 4,
// register block size along the M direction (currently, this one cannot be modified)
#if defined(EIGEN_HAS_FUSED_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
// we assume 16 registers
mr = 3*LhsPacketSize,
#else
@ -248,7 +248,7 @@ public:
// let gcc allocate the register in which to store the result of the pmul
// (in the case where there is no FMA) gcc fails to figure out how to avoid
// spilling register.
#ifdef EIGEN_HAS_FUSED_MADD
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
EIGEN_UNUSED_VARIABLE(tmp);
c = pmadd(a,b,c);
#else
@ -290,7 +290,7 @@ public:
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
nr = 4,
#if defined(EIGEN_HAS_FUSED_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
// we assume 16 registers
mr = 3*LhsPacketSize,
#else
@ -353,7 +353,7 @@ public:
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
{
#ifdef EIGEN_HAS_FUSED_MADD
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
EIGEN_UNUSED_VARIABLE(tmp);
c.v = pmadd(a.v,b,c.v);
#else
@ -637,7 +637,7 @@ public:
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
{
#ifdef EIGEN_HAS_FUSED_MADD
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
EIGEN_UNUSED_VARIABLE(tmp);
c.v = pmadd(a,b.v,c.v);
#else