From 6136f4fdd432dfff1c374348da8f76b9c93ac8ab Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 24 May 2016 10:00:32 -0400 Subject: [PATCH] Remove the rotating kernel. It was only useful on some ARM CPUs (Qualcomm Krait) that are not as ubiquitous today as they were when I introduced it. --- .../Core/products/GeneralBlockPanelKernel.h | 100 +----------------- 1 file changed, 5 insertions(+), 95 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index e43529cc73..253c03462e 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -860,80 +860,6 @@ protected: conj_helper cj; }; -// helper for the rotating kernel below -template -struct PossiblyRotatingKernelHelper -{ - // default implementation, not rotating - - typedef typename GebpKernel::Traits Traits; - typedef typename Traits::RhsScalar RhsScalar; - typedef typename Traits::RhsPacket RhsPacket; - typedef typename Traits::AccPacket AccPacket; - - const Traits& traits; - PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {} - - - template - void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const - { - traits.loadRhs(from + (Index+4*K)*Traits::RhsProgress, to); - } - - void unrotateResult(AccPacket&, - AccPacket&, - AccPacket&, - AccPacket&) - { - } -}; - -// rotating implementation -template -struct PossiblyRotatingKernelHelper -{ - typedef typename GebpKernel::Traits Traits; - typedef typename Traits::RhsScalar RhsScalar; - typedef typename Traits::RhsPacket RhsPacket; - typedef typename Traits::AccPacket AccPacket; - - const Traits& traits; - PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {} - - template - void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const - { - if (Index == 0) { - to = pload(from + 4*K*Traits::RhsProgress); - } else { - EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers"); - to = protate<1>(to); - } - } - - void unrotateResult(AccPacket& res0, - AccPacket& res1, - AccPacket& res2, - AccPacket& res3) - { - PacketBlock resblock; - resblock.packet[0] = res0; - resblock.packet[1] = res1; - resblock.packet[2] = res2; - resblock.packet[3] = res3; - ptranspose(resblock); - resblock.packet[3] = protate<1>(resblock.packet[3]); - resblock.packet[2] = protate<2>(resblock.packet[2]); - resblock.packet[1] = protate<3>(resblock.packet[1]); - ptranspose(resblock); - res0 = resblock.packet[0]; - res1 = resblock.packet[1]; - res2 = resblock.packet[2]; - res3 = resblock.packet[3]; - } -}; - /* optimized GEneral packed Block * packed Panel product kernel * * Mixing type logic: C += A * B @@ -967,16 +893,6 @@ struct gebp_kernel ResPacketSize = Traits::ResPacketSize }; - - static const bool UseRotatingKernel = - EIGEN_ARCH_ARM && - internal::is_same::value && - internal::is_same::value && - internal::is_same::value && - Traits::LhsPacketSize == 4 && - Traits::RhsPacketSize == 4 && - Traits::ResPacketSize == 4; - EIGEN_DONT_INLINE void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha, @@ -1009,9 +925,7 @@ void gebp_kernel=3*Traits::LhsProgress) - { - PossiblyRotatingKernelHelper possiblyRotatingKernelHelper(traits); - + { // Here, the general idea is to loop on each largest micro horizontal panel of the lhs (3*Traits::LhsProgress x depth) // and on each largest micro vertical panel of the rhs (depth * nr). // Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1. @@ -1074,19 +988,19 @@ void gebp_kernel(B_0, blB); \ + traits.loadRhs(blB + (0+4*K)*Traits::RhsProgress, B_0); \ traits.madd(A0, B_0, C0, T0); \ traits.madd(A1, B_0, C4, T0); \ traits.madd(A2, B_0, C8, B_0); \ - possiblyRotatingKernelHelper.template loadOrRotateRhs(B_0, blB); \ + traits.loadRhs(blB + (1+4*K)*Traits::RhsProgress, B_0); \ traits.madd(A0, B_0, C1, T0); \ traits.madd(A1, B_0, C5, T0); \ traits.madd(A2, B_0, C9, B_0); \ - possiblyRotatingKernelHelper.template loadOrRotateRhs(B_0, blB); \ + traits.loadRhs(blB + (2+4*K)*Traits::RhsProgress, B_0); \ traits.madd(A0, B_0, C2, T0); \ traits.madd(A1, B_0, C6, T0); \ traits.madd(A2, B_0, C10, B_0); \ - possiblyRotatingKernelHelper.template loadOrRotateRhs(B_0, blB); \ + traits.loadRhs(blB + (3+4*K)*Traits::RhsProgress, B_0); \ traits.madd(A0, B_0, C3 , T0); \ traits.madd(A1, B_0, C7, T0); \ traits.madd(A2, B_0, C11, B_0); \ @@ -1120,10 +1034,6 @@ void gebp_kernel(alpha);