mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-18 14:34:17 +08:00
* makes all product use the new API to set the blocking sizes
* fix an issue preventing multithreading (now Dynamic = -1 ...)
This commit is contained in:
parent
fd9a9fa0ae
commit
b4fe53f561
@ -284,7 +284,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
|
|||||||
_ActualRhsType,
|
_ActualRhsType,
|
||||||
Dest> GemmFunctor;
|
Dest> GemmFunctor;
|
||||||
|
|
||||||
ei_parallelize_gemm<(Dest::MaxRowsAtCompileTime>32)>(GemmFunctor(lhs, rhs, dst, actualAlpha), this->rows(), this->cols());
|
ei_parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha), this->rows(), this->cols());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@
|
|||||||
inline void ei_manage_multi_threading(Action action, int* v)
|
inline void ei_manage_multi_threading(Action action, int* v)
|
||||||
{
|
{
|
||||||
static int m_maxThreads = -1;
|
static int m_maxThreads = -1;
|
||||||
|
|
||||||
if(action==SetAction)
|
if(action==SetAction)
|
||||||
{
|
{
|
||||||
ei_internal_assert(v!=0);
|
ei_internal_assert(v!=0);
|
||||||
|
@ -258,8 +258,10 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
|
|||||||
|
|
||||||
typedef ei_product_blocking_traits<Scalar> Blocking;
|
typedef ei_product_blocking_traits<Scalar> Blocking;
|
||||||
|
|
||||||
Index kc = std::min<Index>(Blocking::Max_kc,size); // cache block size along the K direction
|
Index kc = size; // cache block size along the K direction
|
||||||
Index mc = std::min<Index>(Blocking::Max_mc,rows); // cache block size along the M direction
|
Index mc = rows; // cache block size along the M direction
|
||||||
|
Index nc = cols; // cache block size along the N direction
|
||||||
|
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
||||||
|
|
||||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
||||||
@ -339,8 +341,10 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
|
|||||||
|
|
||||||
typedef ei_product_blocking_traits<Scalar> Blocking;
|
typedef ei_product_blocking_traits<Scalar> Blocking;
|
||||||
|
|
||||||
Index kc = std::min<Index>(Blocking::Max_kc,size); // cache block size along the K direction
|
Index kc = size; // cache block size along the K direction
|
||||||
Index mc = std::min<Index>(Blocking::Max_mc,rows); // cache block size along the M direction
|
Index mc = rows; // cache block size along the M direction
|
||||||
|
Index nc = cols; // cache block size along the N direction
|
||||||
|
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
||||||
|
|
||||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
||||||
|
@ -70,14 +70,16 @@ struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpL
|
|||||||
|
|
||||||
typedef ei_product_blocking_traits<Scalar> Blocking;
|
typedef ei_product_blocking_traits<Scalar> Blocking;
|
||||||
|
|
||||||
Index kc = std::min<Index>(Blocking::Max_kc,depth); // cache block size along the K direction
|
Index kc = depth; // cache block size along the K direction
|
||||||
Index mc = std::min<Index>(Blocking::Max_mc,size); // cache block size along the M direction
|
Index mc = size; // cache block size along the M direction
|
||||||
|
Index nc = size; // cache block size along the N direction
|
||||||
|
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
||||||
|
|
||||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size;
|
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size;
|
||||||
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
|
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
|
||||||
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
|
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
|
||||||
|
|
||||||
// note that the actual rhs is the transpose/adjoint of mat
|
// note that the actual rhs is the transpose/adjoint of mat
|
||||||
typedef ei_conj_helper<NumTraits<Scalar>::IsComplex && !AAT, NumTraits<Scalar>::IsComplex && AAT> Conj;
|
typedef ei_conj_helper<NumTraits<Scalar>::IsComplex && !AAT, NumTraits<Scalar>::IsComplex && AAT> Conj;
|
||||||
|
|
||||||
|
@ -114,8 +114,12 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|||||||
IsLower = (Mode&Lower) == Lower
|
IsLower = (Mode&Lower) == Lower
|
||||||
};
|
};
|
||||||
|
|
||||||
Index kc = std::min<Index>(Blocking::Max_kc/4,depth); // cache block size along the K direction
|
Index kc = depth; // cache block size along the K direction
|
||||||
Index mc = std::min<Index>(Blocking::Max_mc,rows); // cache block size along the M direction
|
Index mc = rows; // cache block size along the M direction
|
||||||
|
Index nc = cols; // cache block size along the N direction
|
||||||
|
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
||||||
|
// it is better to use smaller blocks along the diagonal
|
||||||
|
kc /= 4;
|
||||||
|
|
||||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
||||||
@ -238,8 +242,12 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
|||||||
IsLower = (Mode&Lower) == Lower
|
IsLower = (Mode&Lower) == Lower
|
||||||
};
|
};
|
||||||
|
|
||||||
Index kc = std::min<Index>(Blocking::Max_kc/4,depth); // cache block size along the K direction
|
Index kc = depth; // cache block size along the K direction
|
||||||
Index mc = std::min<Index>(Blocking::Max_mc,rows); // cache block size along the M direction
|
Index mc = rows; // cache block size along the M direction
|
||||||
|
Index nc = cols; // cache block size along the N direction
|
||||||
|
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
||||||
|
// it is better to use smaller blocks along the diagonal
|
||||||
|
kc /= 4;
|
||||||
|
|
||||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
||||||
@ -273,7 +281,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
|||||||
Index rs = IsLower ? std::min(cols,actual_k2) : cols - k2;
|
Index rs = IsLower ? std::min(cols,actual_k2) : cols - k2;
|
||||||
// size of the triangular part
|
// size of the triangular part
|
||||||
Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
|
Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
|
||||||
|
|
||||||
Scalar* geb = blockB+ts*ts;
|
Scalar* geb = blockB+ts*ts;
|
||||||
|
|
||||||
pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, alpha, actual_kc, rs);
|
pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, alpha, actual_kc, rs);
|
||||||
|
@ -63,8 +63,12 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
|
|||||||
IsLower = (Mode&Lower) == Lower
|
IsLower = (Mode&Lower) == Lower
|
||||||
};
|
};
|
||||||
|
|
||||||
Index kc = std::min<Index>(Blocking::Max_kc/4,size); // cache block size along the K direction
|
Index kc = size; // cache block size along the K direction
|
||||||
Index mc = std::min<Index>(Blocking::Max_mc,size); // cache block size along the M direction
|
Index mc = size; // cache block size along the M direction
|
||||||
|
Index nc = cols; // cache block size along the N direction
|
||||||
|
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
||||||
|
// it is better to use smaller blocks along the diagonal
|
||||||
|
kc /= 4;
|
||||||
|
|
||||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
||||||
@ -196,8 +200,15 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
|
|||||||
IsLower = (Mode&Lower) == Lower
|
IsLower = (Mode&Lower) == Lower
|
||||||
};
|
};
|
||||||
|
|
||||||
Index kc = std::min<Index>(Blocking::Max_kc/4,size); // cache block size along the K direction
|
// Index kc = std::min<Index>(Blocking::Max_kc/4,size); // cache block size along the K direction
|
||||||
Index mc = std::min<Index>(Blocking::Max_mc,size); // cache block size along the M direction
|
// Index mc = std::min<Index>(Blocking::Max_mc,size); // cache block size along the M direction
|
||||||
|
// check that !!!!
|
||||||
|
Index kc = size; // cache block size along the K direction
|
||||||
|
Index mc = size; // cache block size along the M direction
|
||||||
|
Index nc = rows; // cache block size along the N direction
|
||||||
|
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
||||||
|
// it is better to use smaller blocks along the diagonal
|
||||||
|
kc /= 4;
|
||||||
|
|
||||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size;
|
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size;
|
||||||
|
@ -123,7 +123,7 @@ class ei_const_blas_data_mapper
|
|||||||
Index m_stride;
|
Index m_stride;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Defines various constant controlling level 3 blocking
|
// Defines various constant controlling register blocking for matrix-matrix algorithms.
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct ei_product_blocking_traits
|
struct ei_product_blocking_traits
|
||||||
{
|
{
|
||||||
@ -136,13 +136,7 @@ struct ei_product_blocking_traits
|
|||||||
nr = NumberOfRegisters/4,
|
nr = NumberOfRegisters/4,
|
||||||
|
|
||||||
// register block size along the M direction (currently, this one cannot be modified)
|
// register block size along the M direction (currently, this one cannot be modified)
|
||||||
mr = 2 * PacketSize,
|
mr = 2 * PacketSize
|
||||||
|
|
||||||
// max cache block size along the K direction
|
|
||||||
Max_kc = 4 * ei_meta_sqrt<EIGEN_TUNE_FOR_CPU_CACHE_SIZE/(64*sizeof(Scalar))>::ret,
|
|
||||||
|
|
||||||
// max cache block size along the M direction
|
|
||||||
Max_mc = 2*Max_kc
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user