Small cleanup: Get rid of the macros EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD and CJMADD, which were effectively unused, apart from on x86, where the change results in identically performing code.

This commit is contained in:
Rasmus Munk Larsen 2021-06-24 18:52:17 -07:00
parent 52a5f98212
commit bffd267d17
6 changed files with 7 additions and 59 deletions

View File

@ -22,10 +22,6 @@ namespace internal {
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#endif
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#endif
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32

View File

@ -28,10 +28,6 @@ namespace internal {
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#endif
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#endif
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
#endif

View File

@ -24,10 +24,6 @@ namespace internal {
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#endif
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#endif
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#if EIGEN_ARCH_ARM64
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32

View File

@ -22,10 +22,6 @@ namespace internal
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#endif
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#endif
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
template <typename Scalar, int SVEVectorLength>

View File

@ -22,10 +22,6 @@ namespace internal {
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#endif
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#endif
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
#endif

View File

@ -349,36 +349,6 @@ inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_
computeProductBlockingSizes<LhsScalar,RhsScalar,1,Index>(k, m, n, num_threads);
}
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
#else
// FIXME (a bit overkill maybe ?)
template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
{
c = cj.pmadd(a,b,c);
}
};
template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
{
t = b; t = cj.pmul(a,t); c = padd(c,t);
}
};
template<typename CJ, typename A, typename B, typename C, typename T>
EIGEN_STRONG_INLINE void gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t)
{
gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
}
#define CJMADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T);
// #define CJMADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T);
#endif
template <typename RhsPacket, typename RhsPacketx4, int registers_taken>
struct RhsPanelHelper {
private:
@ -2060,14 +2030,14 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
B_0 = blB[0];
B_1 = blB[1];
CJMADD(cj,A0,B_0,C0, B_0);
CJMADD(cj,A0,B_1,C1, B_1);
C0 = cj.pmadd(A0,B_0,C0);
C1 = cj.pmadd(A0,B_1,C1);
B_0 = blB[2];
B_1 = blB[3];
CJMADD(cj,A0,B_0,C2, B_0);
CJMADD(cj,A0,B_1,C3, B_1);
C2 = cj.pmadd(A0,B_0,C2);
C3 = cj.pmadd(A0,B_1,C3);
blB += 4;
}
res(i, j2 + 0) += alpha * C0;
@ -2092,7 +2062,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
{
LhsScalar A0 = blA[k];
RhsScalar B_0 = blB[k];
CJMADD(cj, A0, B_0, C0, B_0);
C0 = cj.pmadd(A0, B_0, C0);
}
res(i, j2) += alpha * C0;
}
@ -2101,8 +2071,6 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
}
#undef CJMADD
// pack a block of the lhs
// The traversal is as follow (mr==4):
// 0 4 8 12 ...