mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-27 07:29:52 +08:00
bug #936, patch 1.5/3: rename _FUSED_ macros to _SINGLE_INSTRUCTION_,
because this is what they are about. "Fused" means "no intermediate rounding between the mul and the add, only one rounding at the end". Instead, what we are concerned about here is whether a temporary register is needed, i.e. whether the MUL and ADD are separate instructions. Concretely, on ARM NEON, a single-instruction mul-add is always available: VMLA. But a true fused mul-add is only available on VFPv4: VFMA.
This commit is contained in:
parent
9f99f61e69
commit
340b8afb14
@ -23,8 +23,8 @@ namespace internal {
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
#ifndef EIGEN_HAS_FUSED_MADD
|
||||
#define EIGEN_HAS_FUSED_MADD 1
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -18,12 +18,12 @@ namespace internal {
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_FUSED_MADD
|
||||
#define EIGEN_HAS_FUSED_MADD 1
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_FUSED_CJMADD
|
||||
#define EIGEN_HAS_FUSED_CJMADD
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#endif
|
||||
|
||||
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
|
||||
|
@ -20,12 +20,12 @@ namespace internal {
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_FUSED_MADD
|
||||
#define EIGEN_HAS_FUSED_MADD 1
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_FUSED_CJMADD
|
||||
#define EIGEN_HAS_FUSED_CJMADD
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#endif
|
||||
|
||||
// FIXME NEON has 16 quad registers, but since the current register allocator
|
||||
|
@ -23,8 +23,8 @@ namespace internal {
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
#ifndef EIGEN_HAS_FUSED_MADD
|
||||
#define EIGEN_HAS_FUSED_MADD 1
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -120,7 +120,7 @@ inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_HAS_FUSED_CJMADD
|
||||
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
|
||||
#else
|
||||
|
||||
@ -182,7 +182,7 @@ public:
|
||||
nr = 4,
|
||||
|
||||
// register block size along the M direction (currently, this one cannot be modified)
|
||||
#if defined(EIGEN_HAS_FUSED_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
|
||||
#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
|
||||
// we assume 16 registers
|
||||
mr = 3*LhsPacketSize,
|
||||
#else
|
||||
@ -248,7 +248,7 @@ public:
|
||||
// let gcc allocate the register in which to store the result of the pmul
|
||||
// (in the case where there is no FMA) gcc fails to figure out how to avoid
|
||||
// spilling register.
|
||||
#ifdef EIGEN_HAS_FUSED_MADD
|
||||
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
EIGEN_UNUSED_VARIABLE(tmp);
|
||||
c = pmadd(a,b,c);
|
||||
#else
|
||||
@ -290,7 +290,7 @@ public:
|
||||
|
||||
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
|
||||
nr = 4,
|
||||
#if defined(EIGEN_HAS_FUSED_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
|
||||
#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
|
||||
// we assume 16 registers
|
||||
mr = 3*LhsPacketSize,
|
||||
#else
|
||||
@ -353,7 +353,7 @@ public:
|
||||
|
||||
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
|
||||
{
|
||||
#ifdef EIGEN_HAS_FUSED_MADD
|
||||
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
EIGEN_UNUSED_VARIABLE(tmp);
|
||||
c.v = pmadd(a.v,b,c.v);
|
||||
#else
|
||||
@ -637,7 +637,7 @@ public:
|
||||
|
||||
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
|
||||
{
|
||||
#ifdef EIGEN_HAS_FUSED_MADD
|
||||
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
EIGEN_UNUSED_VARIABLE(tmp);
|
||||
c.v = pmadd(a,b.v,c.v);
|
||||
#else
|
||||
|
Loading…
Reference in New Issue
Block a user