From b1bd53aa6bec39c53de475c90987eece86c206d2 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sun, 1 May 2016 23:25:06 +0200 Subject: [PATCH] Fix performance regression: with AVX, unaligned stores were emitted instead of aligned ones for fixed size assignement. --- Eigen/src/Core/AssignEvaluator.h | 15 +++++++++++---- Eigen/src/Core/CoreEvaluators.h | 4 ++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 9d4b315a0..b1193e421 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -256,12 +256,13 @@ struct copy_using_evaluator_innervec_CompleteUnrolling enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime, - JointAlignment = Kernel::AssignmentTraits::JointAlignment + JointAlignment = Kernel::AssignmentTraits::JointAlignment, + DefaultAlignment = unpacket_traits::alignment }; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { - kernel.template assignPacketByOuterInner(outer, inner); + kernel.template assignPacketByOuterInner(outer, inner); enum { NextIndex = Index + unpacket_traits::size }; copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); } @@ -277,9 +278,12 @@ template struct copy_using_evaluator_innervec_InnerUnrolling { typedef typename Kernel::PacketType PacketType; + enum { + DefaultAlignment = unpacket_traits::alignment + }; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) { - kernel.template assignPacketByOuterInner(outer, Index_); + kernel.template assignPacketByOuterInner(outer, Index_); enum { NextIndex = Index_ + unpacket_traits::size }; copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); } @@ -433,6 +437,9 @@ template struct dense_assignment_loop { typedef typename Kernel::PacketType PacketType; + enum { + DefaultAlignment = unpacket_traits::alignment + }; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { const Index innerSize = kernel.innerSize(); @@ -440,7 +447,7 @@ struct dense_assignment_loop const Index packetSize = unpacket_traits::size; for(Index outer = 0; outer < outerSize; ++outer) for(Index inner = 0; inner < innerSize; inner+=packetSize) - kernel.template assignPacketByOuterInner(outer, inner); + kernel.template assignPacketByOuterInner(outer, inner); } }; diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 388805f0d..932178f53 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -850,14 +850,14 @@ struct unary_evaluator, IndexBa template EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) - { + { return m_argImpl.template writePacket(m_startRow.value() + row, m_startCol.value() + col, x); } template EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) - { + { return writePacket(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0, x);