From b67c983291706bd5cc776b3202cacb6ca6508b76 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 12 Apr 2016 23:03:03 +0200 Subject: [PATCH] Enable the use of half-packet in coeff-based product. For instance, Matrix4f*Vector4f is now vectorized again when using AVX. --- Eigen/src/Core/ProductEvaluators.h | 36 +++++++++++++++++------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 3ce86e8cd5..443a58c27e 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -410,8 +410,6 @@ struct product_evaluator, ProductTag, DenseShape, typedef Product XprType; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketScalar PacketScalar; - typedef typename XprType::PacketReturnType PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr) @@ -437,16 +435,20 @@ struct product_evaluator, ProductTag, DenseShape, typedef evaluator LhsEtorType; typedef evaluator RhsEtorType; - + enum { RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, - MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime, - - PacketSize = packet_traits::size, + MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime + }; + typedef typename find_best_packet::type LhsVecPacketType; + typedef typename find_best_packet::type RhsVecPacketType; + + enum { + LhsCoeffReadCost = LhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost, CoeffReadCost = InnerSize==0 ? NumTraits::ReadCost @@ -459,19 +461,23 @@ struct product_evaluator, ProductTag, DenseShape, LhsFlags = LhsEtorType::Flags, RhsFlags = RhsEtorType::Flags, - LhsAlignment = LhsEtorType::Alignment, - RhsAlignment = RhsEtorType::Alignment, - LhsRowMajor = LhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit, + + LhsVecPacketSize = unpacket_traits::size, + RhsVecPacketSize = unpacket_traits::size, + + // + LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))), + RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))), SameType = is_same::value, CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) - && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % PacketSize) == 0) ), + && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % RhsVecPacketSize) == 0) ), CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) - && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % PacketSize) == 0) ), + && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % LhsVecPacketSize) == 0) ), EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -491,10 +497,10 @@ struct product_evaluator, ProductTag, DenseShape, : 0, /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside - * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner - * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect - * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. - */ + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor)