mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-12 14:25:16 +08:00
Small cleanup: Get rid of the macros EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD and CJMADD, which were effectively unused, apart from on x86, where the change results in identically performing code.
This commit is contained in:
parent
52a5f98212
commit
bffd267d17
@ -22,10 +22,6 @@ namespace internal {
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#endif
|
||||
|
||||
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||
|
@ -28,10 +28,6 @@ namespace internal {
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||
#endif
|
||||
|
@ -24,10 +24,6 @@ namespace internal {
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#if EIGEN_ARCH_ARM64
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||
|
@ -22,10 +22,6 @@ namespace internal
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#endif
|
||||
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||
|
||||
template <typename Scalar, int SVEVectorLength>
|
||||
|
@ -22,10 +22,6 @@ namespace internal {
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||
#endif
|
||||
|
@ -349,36 +349,6 @@ inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar,1,Index>(k, m, n, num_threads);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
|
||||
#else
|
||||
|
||||
// FIXME (a bit overkill maybe ?)
|
||||
|
||||
template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
|
||||
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
|
||||
{
|
||||
c = cj.pmadd(a,b,c);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
|
||||
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
|
||||
{
|
||||
t = b; t = cj.pmul(a,t); c = padd(c,t);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename CJ, typename A, typename B, typename C, typename T>
|
||||
EIGEN_STRONG_INLINE void gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t)
|
||||
{
|
||||
gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
|
||||
}
|
||||
|
||||
#define CJMADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T);
|
||||
// #define CJMADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T);
|
||||
#endif
|
||||
|
||||
template <typename RhsPacket, typename RhsPacketx4, int registers_taken>
|
||||
struct RhsPanelHelper {
|
||||
private:
|
||||
@ -2060,14 +2030,14 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
|
||||
B_0 = blB[0];
|
||||
B_1 = blB[1];
|
||||
CJMADD(cj,A0,B_0,C0, B_0);
|
||||
CJMADD(cj,A0,B_1,C1, B_1);
|
||||
|
||||
C0 = cj.pmadd(A0,B_0,C0);
|
||||
C1 = cj.pmadd(A0,B_1,C1);
|
||||
|
||||
B_0 = blB[2];
|
||||
B_1 = blB[3];
|
||||
CJMADD(cj,A0,B_0,C2, B_0);
|
||||
CJMADD(cj,A0,B_1,C3, B_1);
|
||||
|
||||
C2 = cj.pmadd(A0,B_0,C2);
|
||||
C3 = cj.pmadd(A0,B_1,C3);
|
||||
|
||||
blB += 4;
|
||||
}
|
||||
res(i, j2 + 0) += alpha * C0;
|
||||
@ -2092,7 +2062,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
{
|
||||
LhsScalar A0 = blA[k];
|
||||
RhsScalar B_0 = blB[k];
|
||||
CJMADD(cj, A0, B_0, C0, B_0);
|
||||
C0 = cj.pmadd(A0, B_0, C0);
|
||||
}
|
||||
res(i, j2) += alpha * C0;
|
||||
}
|
||||
@ -2101,8 +2071,6 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
}
|
||||
|
||||
|
||||
#undef CJMADD
|
||||
|
||||
// pack a block of the lhs
|
||||
// The traversal is as follow (mr==4):
|
||||
// 0 4 8 12 ...
|
||||
|
Loading…
Reference in New Issue
Block a user