From b4fe53f5610f20e9c4f15c676b3b26d252fd2fba Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 22 Jun 2010 16:08:35 +0200 Subject: [PATCH] * makes all product use the new API to set the blocking sizes * fix an issue preventing multithreading (now Dynamic = -1 ...) --- Eigen/src/Core/products/GeneralMatrixMatrix.h | 2 +- Eigen/src/Core/products/Parallelizer.h | 2 +- .../Core/products/SelfadjointMatrixMatrix.h | 12 ++++++++---- Eigen/src/Core/products/SelfadjointProduct.h | 8 +++++--- .../Core/products/TriangularMatrixMatrix.h | 18 +++++++++++++----- .../Core/products/TriangularSolverMatrix.h | 19 +++++++++++++++---- Eigen/src/Core/util/BlasUtil.h | 10 ++-------- 7 files changed, 45 insertions(+), 26 deletions(-) diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 9139976c3..062d75ba9 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -284,7 +284,7 @@ class GeneralProduct _ActualRhsType, Dest> GemmFunctor; - ei_parallelize_gemm<(Dest::MaxRowsAtCompileTime>32)>(GemmFunctor(lhs, rhs, dst, actualAlpha), this->rows(), this->cols()); + ei_parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha), this->rows(), this->cols()); } }; diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index 588f78b4c..750fa7b5f 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -29,7 +29,7 @@ inline void ei_manage_multi_threading(Action action, int* v) { static int m_maxThreads = -1; - + if(action==SetAction) { ei_internal_assert(v!=0); diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index 31726e66d..cc9333384 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -258,8 +258,10 @@ struct ei_product_selfadjoint_matrix Blocking; - Index kc = std::min(Blocking::Max_kc,size); // cache block size along the K direction - Index mc = std::min(Blocking::Max_mc,rows); // cache block size along the M direction + Index kc = size; // cache block size along the K direction + Index mc = rows; // cache block size along the M direction + Index nc = cols; // cache block size along the N direction + computeProductBlockingSizes(kc, mc, nc); Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols; @@ -339,8 +341,10 @@ struct ei_product_selfadjoint_matrix Blocking; - Index kc = std::min(Blocking::Max_kc,size); // cache block size along the K direction - Index mc = std::min(Blocking::Max_mc,rows); // cache block size along the M direction + Index kc = size; // cache block size along the K direction + Index mc = rows; // cache block size along the M direction + Index nc = cols; // cache block size along the N direction + computeProductBlockingSizes(kc, mc, nc); Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols; diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h index befc4ff69..8ce797cff 100644 --- a/Eigen/src/Core/products/SelfadjointProduct.h +++ b/Eigen/src/Core/products/SelfadjointProduct.h @@ -70,14 +70,16 @@ struct ei_selfadjoint_product Blocking; - Index kc = std::min(Blocking::Max_kc,depth); // cache block size along the K direction - Index mc = std::min(Blocking::Max_mc,size); // cache block size along the M direction + Index kc = depth; // cache block size along the K direction + Index mc = size; // cache block size along the M direction + Index nc = size; // cache block size along the N direction + computeProductBlockingSizes(kc, mc, nc); Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size; Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr; - + // note that the actual rhs is the transpose/adjoint of mat typedef ei_conj_helper::IsComplex && !AAT, NumTraits::IsComplex && AAT> Conj; diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index a099160c2..decf515b0 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -114,8 +114,12 @@ struct ei_product_triangular_matrix_matrix(Blocking::Max_kc/4,depth); // cache block size along the K direction - Index mc = std::min(Blocking::Max_mc,rows); // cache block size along the M direction + Index kc = depth; // cache block size along the K direction + Index mc = rows; // cache block size along the M direction + Index nc = cols; // cache block size along the N direction + computeProductBlockingSizes(kc, mc, nc); + // it is better to use smaller blocks along the diagonal + kc /= 4; Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols; @@ -238,8 +242,12 @@ struct ei_product_triangular_matrix_matrix(Blocking::Max_kc/4,depth); // cache block size along the K direction - Index mc = std::min(Blocking::Max_mc,rows); // cache block size along the M direction + Index kc = depth; // cache block size along the K direction + Index mc = rows; // cache block size along the M direction + Index nc = cols; // cache block size along the N direction + computeProductBlockingSizes(kc, mc, nc); + // it is better to use smaller blocks along the diagonal + kc /= 4; Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols; @@ -273,7 +281,7 @@ struct ei_product_triangular_matrix_matrix=cols) ? 0 : actual_kc; - + Scalar* geb = blockB+ts*ts; pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, alpha, actual_kc, rs); diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index 11e08c3b5..381983459 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -63,8 +63,12 @@ struct ei_triangular_solve_matrix(Blocking::Max_kc/4,size); // cache block size along the K direction - Index mc = std::min(Blocking::Max_mc,size); // cache block size along the M direction + Index kc = size; // cache block size along the K direction + Index mc = size; // cache block size along the M direction + Index nc = cols; // cache block size along the N direction + computeProductBlockingSizes(kc, mc, nc); + // it is better to use smaller blocks along the diagonal + kc /= 4; Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols; @@ -196,8 +200,15 @@ struct ei_triangular_solve_matrix(Blocking::Max_kc/4,size); // cache block size along the K direction - Index mc = std::min(Blocking::Max_mc,size); // cache block size along the M direction +// Index kc = std::min(Blocking::Max_kc/4,size); // cache block size along the K direction +// Index mc = std::min(Blocking::Max_mc,size); // cache block size along the M direction + // check that !!!! + Index kc = size; // cache block size along the K direction + Index mc = size; // cache block size along the M direction + Index nc = rows; // cache block size along the N direction + computeProductBlockingSizes(kc, mc, nc); + // it is better to use smaller blocks along the diagonal + kc /= 4; Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size; diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index a5fa1532d..c86d70fb2 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -123,7 +123,7 @@ class ei_const_blas_data_mapper Index m_stride; }; -// Defines various constant controlling level 3 blocking +// Defines various constant controlling register blocking for matrix-matrix algorithms. template struct ei_product_blocking_traits { @@ -136,13 +136,7 @@ struct ei_product_blocking_traits nr = NumberOfRegisters/4, // register block size along the M direction (currently, this one cannot be modified) - mr = 2 * PacketSize, - - // max cache block size along the K direction - Max_kc = 4 * ei_meta_sqrt::ret, - - // max cache block size along the M direction - Max_mc = 2*Max_kc + mr = 2 * PacketSize }; };