From d44fce501bf299692d578349b92c899c3f0d79cd Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 24 Jun 2010 11:50:28 +0200 Subject: [PATCH] fix computation of blocking sizes for small triangular matrices --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 10 ++++++++-- Eigen/src/Core/products/TriangularMatrixMatrix.h | 8 ++------ Eigen/src/Core/products/TriangularSolverMatrix.h | 8 ++------ 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index cf48ca2f4..7e42eed6e 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -101,7 +101,7 @@ inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2) * - the number of scalars that fit into a packet (when vectorization is enabled). * * \sa setCpuCacheSizes */ -template +template void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n) { // Explanations: @@ -114,7 +114,7 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd std::ptrdiff_t l1, l2; enum { - kdiv = 2 * ei_product_blocking_traits::nr + kdiv = KcFactor * 2 * ei_product_blocking_traits::nr * ei_packet_traits::size * sizeof(RhsScalar), mr = ei_product_blocking_traits::mr, mr_mask = (0xffffffff/mr)*mr @@ -127,6 +127,12 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd n = n; } +template +inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n) +{ + computeProductBlockingSizes(k, m, n); +} + #ifdef EIGEN_HAS_FUSE_CJMADD #define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C); #else diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index decf515b0..979609649 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -117,9 +117,7 @@ struct ei_product_triangular_matrix_matrix(kc, mc, nc); - // it is better to use smaller blocks along the diagonal - kc /= 4; + computeProductBlockingSizes(kc, mc, nc); Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols; @@ -245,9 +243,7 @@ struct ei_product_triangular_matrix_matrix(kc, mc, nc); - // it is better to use smaller blocks along the diagonal - kc /= 4; + computeProductBlockingSizes(kc, mc, nc); Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols; diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index 381983459..4723d355a 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -66,9 +66,7 @@ struct ei_triangular_solve_matrix(kc, mc, nc); - // it is better to use smaller blocks along the diagonal - kc /= 4; + computeProductBlockingSizes(kc, mc, nc); Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols; @@ -206,9 +204,7 @@ struct ei_triangular_solve_matrix(kc, mc, nc); - // it is better to use smaller blocks along the diagonal - kc /= 4; + computeProductBlockingSizes(kc, mc, nc); Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size;