mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-02-23 18:20:47 +08:00
make the cache size mechanism future proof by adding level 2 parameters
This commit is contained in:
parent
986f65c402
commit
842b54fe80
@ -26,53 +26,66 @@
|
||||
#define EIGEN_GENERAL_BLOCK_PANEL_H
|
||||
|
||||
/** \internal */
|
||||
inline void ei_manage_caching_sizes(Action action, std::ptrdiff_t* a=0, std::ptrdiff_t* b=0, int scalar_size = 0)
|
||||
inline void ei_manage_caching_sizes(Action action, std::ptrdiff_t* a=0, std::ptrdiff_t* b=0, std::ptrdiff_t* c=0, int scalar_size = 0)
|
||||
{
|
||||
const int nbScalarSizes = 12;
|
||||
static std::ptrdiff_t m_maxK[nbScalarSizes];
|
||||
static std::ptrdiff_t m_maxM[nbScalarSizes];
|
||||
static std::ptrdiff_t m_cpuCacheSize = 0;
|
||||
if(m_cpuCacheSize==0)
|
||||
static std::ptrdiff_t m_maxN[nbScalarSizes];
|
||||
static std::ptrdiff_t m_l1CacheSize = 0;
|
||||
static std::ptrdiff_t m_l2CacheSize = 0;
|
||||
if(m_l1CacheSize==0)
|
||||
{
|
||||
// initialization
|
||||
m_cpuCacheSize = EIGEN_TUNE_FOR_CPU_CACHE_SIZE;
|
||||
ei_manage_caching_sizes(SetAction,&m_cpuCacheSize);
|
||||
m_l1CacheSize = EIGEN_TUNE_FOR_CPU_CACHE_SIZE;
|
||||
m_l2CacheSize = 32*EIGEN_TUNE_FOR_CPU_CACHE_SIZE;
|
||||
ei_manage_caching_sizes(SetAction,&m_l1CacheSize, &m_l2CacheSize);
|
||||
}
|
||||
|
||||
if(action==SetAction && scalar_size==0)
|
||||
{
|
||||
// set the cpu cache size and cache all block sizes from a global cache size in byte
|
||||
ei_internal_assert(a!=0 && b==0);
|
||||
m_cpuCacheSize = *a;
|
||||
ei_internal_assert(a!=0 && b!=0 && c==0);
|
||||
m_l1CacheSize = *a;
|
||||
m_l2CacheSize = *b;
|
||||
int ss = 4;
|
||||
for(int i=0; i<nbScalarSizes;++i,ss+=4)
|
||||
{
|
||||
m_maxK[i] = 4 * std::ptrdiff_t(ei_sqrt<float>(m_cpuCacheSize/(64*ss)));
|
||||
// Round the block size such that it is a multiple of 64/ss.
|
||||
// This is to make sure the block size are multiple of the register block sizes.
|
||||
// And in the worst case we ensure an even number.
|
||||
std::ptrdiff_t rb = 64/ss;
|
||||
if(rb==0) rb = 1;
|
||||
m_maxK[i] = 4 * std::ptrdiff_t(ei_sqrt<float>(m_l1CacheSize/(64*ss)));
|
||||
m_maxM[i] = 2 * m_maxK[i];
|
||||
m_maxN[i] = ((m_l2CacheSize / (2 * m_maxK[i] * ss))/4)*4;
|
||||
}
|
||||
}
|
||||
else if(action==SetAction && scalar_size!=0)
|
||||
{
|
||||
// set the block sizes for the given scalar type (represented as its size)
|
||||
ei_internal_assert(a!=0 && b!=0);
|
||||
ei_internal_assert(a!=0 && b!=0 && c!=0);
|
||||
int i = std::max((scalar_size>>2)-1,0);
|
||||
if(i<nbScalarSizes)
|
||||
{
|
||||
m_maxK[i] = *a;
|
||||
m_maxM[i] = *b;
|
||||
m_maxN[i] = *c;
|
||||
}
|
||||
}
|
||||
else if(action==GetAction && scalar_size==0)
|
||||
{
|
||||
ei_internal_assert(a!=0 && b==0);
|
||||
*a = m_cpuCacheSize;
|
||||
ei_internal_assert(a!=0 && b!=0 && c==0);
|
||||
*a = m_l1CacheSize;
|
||||
*b = m_l2CacheSize;
|
||||
}
|
||||
else if(action==GetAction && scalar_size!=0)
|
||||
{
|
||||
ei_internal_assert(a!=0 && b!=0);
|
||||
ei_internal_assert(a!=0 && b!=0 && c!=0);
|
||||
int i = std::min(std::max((scalar_size>>2),1),nbScalarSizes)-1;
|
||||
*a = m_maxK[i];
|
||||
*b = m_maxM[i];
|
||||
*c = m_maxN[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -80,53 +93,82 @@ inline void ei_manage_caching_sizes(Action action, std::ptrdiff_t* a=0, std::ptr
|
||||
}
|
||||
}
|
||||
|
||||
/** \returns the currently set cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
|
||||
* \sa setL1CacheSize */
|
||||
/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
|
||||
* \sa setCpuCacheSize */
|
||||
inline std::ptrdiff_t l1CacheSize()
|
||||
{
|
||||
std::ptrdiff_t ret;
|
||||
ei_manage_caching_sizes(GetAction, &ret);
|
||||
return ret;
|
||||
std::ptrdiff_t l1, l2;
|
||||
ei_manage_caching_sizes(GetAction, &l1, &l2);
|
||||
return l1;
|
||||
}
|
||||
|
||||
/** Set the cpu cache size (in bytes) for blocking.
|
||||
* This function also automatically set the blocking size parameters
|
||||
* for each scalar type using the following formula:
|
||||
* \code
|
||||
* max_k = 4 * sqrt(cache_size/(64*sizeof(Scalar)));
|
||||
* max_m = 2 * k;
|
||||
* \endcode
|
||||
* overwriting custom values set using the ei_setBlockingSizes function.
|
||||
*
|
||||
* \b Explanations: \n
|
||||
* Let A * B be a m x k times k x n matrix product. Then Eigen's product yield
|
||||
* L2 blocking on B with panels of size max_k x n, and L1 blocking on A,
|
||||
* with blocks of size max_m x max_k.
|
||||
*
|
||||
* \sa ei_setBlockingSizes */
|
||||
inline void setL1CacheSize(std::ptrdiff_t cache_size) { ei_manage_caching_sizes(SetAction,&cache_size); }
|
||||
|
||||
/** Set the blocking size parameters \a maxK and \a maxM for the scalar type \a Scalar.
|
||||
* Note that in practice there is no distinction between scalar types of same size.
|
||||
*
|
||||
* See ei_setCpuCacheSize for an explanation about the meaning of maxK and maxM.
|
||||
*
|
||||
* \sa setL1CacheSize */
|
||||
template<typename Scalar>
|
||||
void setBlockingSizes(std::ptrdiff_t maxK, std::ptrdiff_t maxM)
|
||||
/** \returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
|
||||
* \sa setCpuCacheSize */
|
||||
inline std::ptrdiff_t l2CacheSize()
|
||||
{
|
||||
ei_manage_caching_sizes(SetAction,&maxK,&maxM,sizeof(Scalar));
|
||||
std::ptrdiff_t l1, l2;
|
||||
ei_manage_caching_sizes(GetAction, &l1, &l2);
|
||||
return l2;
|
||||
}
|
||||
|
||||
/** \returns in \a makK, \a maxM the blocking size parameters for the scalar type \a Scalar.
|
||||
/** Set the cpu L1 and L2 cache sizes (in bytes).
|
||||
* These values are use to adjust the size of the blocks
|
||||
* for the algorithms working per blocks.
|
||||
*
|
||||
* See ei_setCpuCacheSize for an explanation about the meaning of maxK and maxM.
|
||||
*
|
||||
* \sa setL1CacheSize */
|
||||
template<typename Scalar>
|
||||
void getBlockingSizes(std::ptrdiff_t& maxK, std::ptrdiff_t& maxM)
|
||||
* This function also automatically set the blocking size parameters
|
||||
* for each scalar type using the following rules:
|
||||
* \code
|
||||
* max_k = 4 * sqrt(l1/(64*sizeof(Scalar)));
|
||||
* max_m = 2 * k;
|
||||
* max_n = l2/(2*max_k*sizeof(Scalar));
|
||||
* \endcode
|
||||
* overwriting custom values set using the setBlockingSizes function.
|
||||
*
|
||||
* See setBlockingSizes() for an explanation about the meaning of these parameters.
|
||||
*
|
||||
* \sa setBlockingSizes */
|
||||
inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2)
|
||||
{
|
||||
ei_manage_caching_sizes(GetAction,&maxK,&maxM,sizeof(Scalar));
|
||||
ei_manage_caching_sizes(SetAction, &l1, &l2);
|
||||
}
|
||||
|
||||
/** \brief Set the blocking size parameters \a maxK, \a maxM and \a maxN for the scalar type \a Scalar.
|
||||
*
|
||||
* \param[in] maxK the size of the L1 and L2 blocks along the k dimension
|
||||
* \param[in] maxM the size of the L1 blocks along the m dimension
|
||||
* \param[in] maxN the size of the L2 blocks along the n dimension
|
||||
*
|
||||
* This function sets the blocking size parameters for matrix products and related algorithms.
|
||||
* More precisely, let A * B be a m x k by k x n matrix product. Then Eigen's product like
|
||||
* algorithms perform L2 blocking on B with horizontal panels of size maxK x maxN,
|
||||
* and L1 blocking on A with blocks of size maxM x maxK.
|
||||
*
|
||||
* Theoretically, for best performances maxM should be closed to maxK and maxM * maxK should
|
||||
* note exceed half of the L1 cache. Likewise, maxK * maxM should be smaller than the L2 cache.
|
||||
*
|
||||
* Note that in practice there is no distinction between scalar types of same size.
|
||||
*
|
||||
* \sa setCpuCacheSizes */
|
||||
template<typename Scalar>
|
||||
void setBlockingSizes(std::ptrdiff_t maxK, std::ptrdiff_t maxM, std::ptrdiff_t maxN)
|
||||
{
|
||||
std::ptrdiff_t k, m, n;
|
||||
typedef ei_product_blocking_traits<Scalar> Traits;
|
||||
k = ((maxK)/4)*4;
|
||||
m = ((maxM)/Traits::mr)*Traits::mr;
|
||||
n = ((maxN)/Traits::nr)*Traits::nr;
|
||||
ei_manage_caching_sizes(SetAction,&k,&m,&n,sizeof(Scalar));
|
||||
}
|
||||
|
||||
/** \returns in \a makK, \a maxM and \a maxN the blocking size parameters for the scalar type \a Scalar.
|
||||
*
|
||||
* See setBlockingSizes for an explanation about the meaning of these parameters.
|
||||
*
|
||||
* \sa setBlockingSizes */
|
||||
template<typename Scalar>
|
||||
void getBlockingSizes(std::ptrdiff_t& maxK, std::ptrdiff_t& maxM, std::ptrdiff_t& maxN)
|
||||
{
|
||||
ei_manage_caching_sizes(GetAction,&maxK,&maxM,&maxN,sizeof(Scalar));
|
||||
}
|
||||
|
||||
#ifdef EIGEN_HAS_FUSE_CJMADD
|
||||
|
@ -77,9 +77,11 @@ static void run(Index rows, Index cols, Index depth,
|
||||
|
||||
Index kc; // cache block size along the K direction
|
||||
Index mc; // cache block size along the M direction
|
||||
getBlockingSizes<Scalar>(kc, mc);
|
||||
Index nc; // cache block size along the N direction
|
||||
getBlockingSizes<Scalar>(kc, mc, nc);
|
||||
kc = std::min<Index>(kc,depth);
|
||||
mc = std::min<Index>(mc,rows);
|
||||
nc = std::min<Index>(nc,cols);
|
||||
|
||||
ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, RhsStorageOrder> pack_rhs;
|
||||
ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, LhsStorageOrder> pack_lhs;
|
||||
|
@ -49,5 +49,22 @@ void test_product_large()
|
||||
MatrixXf a = MatrixXf::Random(10,4), b = MatrixXf::Random(4,10), c = a;
|
||||
VERIFY_IS_APPROX((a = a * b), (c * b).eval());
|
||||
}
|
||||
|
||||
{
|
||||
// check the functions to setup blocking sizes compile and do not segfault
|
||||
// FIXME check they do what they are supposed to do !!
|
||||
std::ptrdiff_t l1 = ei_random<int>(10000,20000);
|
||||
std::ptrdiff_t l2 = ei_random<int>(1000000,2000000);
|
||||
setCpuCacheSizes(l1,l2);
|
||||
VERIFY(l1==l1CacheSize());
|
||||
VERIFY(l2==l2CacheSize());
|
||||
std::ptrdiff_t k1 = ei_random<int>(10,100)*16;
|
||||
std::ptrdiff_t m1 = ei_random<int>(10,100)*16;
|
||||
std::ptrdiff_t n1 = ei_random<int>(10,100)*16;
|
||||
setBlockingSizes<float>(k1,m1,n1);
|
||||
std::ptrdiff_t k, m, n;
|
||||
getBlockingSizes<float>(k,m,n);
|
||||
VERIFY(k==k1 && m==m1 && n==n1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user