From 1eec38dc36aad1dc5574eb8c384ee9b7422c5be8 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 17 Jan 2009 09:48:58 +0000 Subject: [PATCH] Rewrite the vectorized meta unroller of sum to reduce instruction dependency => significant speed up --- Eigen/src/Core/Sum.h | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/Eigen/src/Core/Sum.h b/Eigen/src/Core/Sum.h index e30392534..6d7e9959f 100644 --- a/Eigen/src/Core/Sum.h +++ b/Eigen/src/Core/Sum.h @@ -100,18 +100,13 @@ struct ei_sum_novec_unroller }; /*** vectorization ***/ - -template::size)> + +template struct ei_sum_vec_unroller { enum { - row = int(Derived::Flags)&RowMajorBit - ? Index / int(Derived::ColsAtCompileTime) - : Index % Derived::RowsAtCompileTime, - col = int(Derived::Flags)&RowMajorBit - ? Index % int(Derived::ColsAtCompileTime) - : Index / Derived::RowsAtCompileTime + PacketSize = ei_packet_traits::size, + HalfLength = Length/2 }; typedef typename Derived::Scalar Scalar; @@ -120,22 +115,22 @@ struct ei_sum_vec_unroller inline static PacketScalar run(const Derived &mat) { return ei_padd( - mat.template packet(row, col), - ei_sum_vec_unroller::size, Stop>::run(mat) - ); + ei_sum_vec_unroller::run(mat), + ei_sum_vec_unroller::run(mat) ); } }; -template -struct ei_sum_vec_unroller +template +struct ei_sum_vec_unroller { enum { + index = Start * ei_packet_traits::size, row = int(Derived::Flags)&RowMajorBit - ? Index / int(Derived::ColsAtCompileTime) - : Index % Derived::RowsAtCompileTime, + ? index / int(Derived::ColsAtCompileTime) + : index % Derived::RowsAtCompileTime, col = int(Derived::Flags)&RowMajorBit - ? Index % int(Derived::ColsAtCompileTime) - : Index / Derived::RowsAtCompileTime, + ? index % int(Derived::ColsAtCompileTime) + : index / Derived::RowsAtCompileTime, alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned }; @@ -238,7 +233,7 @@ struct ei_sum_impl }; static Scalar run(const Derived& mat) { - Scalar res = ei_predux(ei_sum_vec_unroller::run(mat)); + Scalar res = ei_predux(ei_sum_vec_unroller::run(mat)); if (VectorizationSize != Size) res += ei_sum_novec_unroller::run(mat); return res;