From c20e3641de5b6d56f5496fef2619a1f53f8a1835 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 25 Apr 2014 13:22:34 +0200 Subject: [PATCH] Fix for mixed products --- .../Core/products/GeneralBlockPanelKernel.h | 53 +++++++++++-------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 41c46c67a..ebf438d57 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -480,8 +480,14 @@ public: loadRhs(b,dest); } - // linking error if instantiated without being optimized out: - void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3); + EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3) + { + // FIXME not sure that's the best way to implement it! + loadRhs(b+0, b0); + loadRhs(b+1, b1); + loadRhs(b+2, b2); + loadRhs(b+3, b3); + } // Vectorized path EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, DoublePacketType& b0, DoublePacketType& b1) @@ -602,9 +608,11 @@ public: dest = pset1(*b); } - // linking error if instantiated without being optimized out: -// void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3); -// + void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3) + { + pbroadcast4(b, b0, b1, b2, b3); + } + // EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1) // { // // FIXME not sure that's the best way to implement it! @@ -1137,19 +1145,16 @@ void gebp_kernel for(Index k=0; k // process remaining peeled loop for(Index k=peeled_kc; k=1*PacketSize ? peeled_mc1 : Pack2>1 ? (rows/Pack2)*Pack2 : 0; + Index i=0; + // Pack 3 packets if(Pack1>=3*PacketSize) { - for(Index i=0; i=2*PacketSize) { - for(Index i=peeled_mc3; i=1*PacketSize) { - for(Index i=peeled_mc2; i1) { - for(Index i=peeled_mc1; i