mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-21 07:19:46 +08:00
Refactor computeProductBlockingSizes to make room for the possibility of using lookup tables
This commit is contained in:
parent
b6b88c0808
commit
e56aabf205
@ -381,6 +381,7 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/Inverse.h"
|
||||
#include "src/Core/TriangularMatrix.h"
|
||||
#include "src/Core/SelfAdjointView.h"
|
||||
#include "src/Core/products/LookupBlockingSizesTable.h"
|
||||
#include "src/Core/products/GeneralBlockPanelKernel.h"
|
||||
#include "src/Core/products/Parallelizer.h"
|
||||
#include "src/Core/ProductEvaluators.h"
|
||||
|
@ -473,48 +473,48 @@ struct random_default_impl<Scalar, false, false>
|
||||
};
|
||||
|
||||
enum {
|
||||
floor_log2_terminate,
|
||||
floor_log2_move_up,
|
||||
floor_log2_move_down,
|
||||
floor_log2_bogus
|
||||
meta_floor_log2_terminate,
|
||||
meta_floor_log2_move_up,
|
||||
meta_floor_log2_move_down,
|
||||
meta_floor_log2_bogus
|
||||
};
|
||||
|
||||
template<unsigned int n, int lower, int upper> struct floor_log2_selector
|
||||
template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector
|
||||
{
|
||||
enum { middle = (lower + upper) / 2,
|
||||
value = (upper <= lower + 1) ? int(floor_log2_terminate)
|
||||
: (n < (1 << middle)) ? int(floor_log2_move_down)
|
||||
: (n==0) ? int(floor_log2_bogus)
|
||||
: int(floor_log2_move_up)
|
||||
value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)
|
||||
: (n < (1 << middle)) ? int(meta_floor_log2_move_down)
|
||||
: (n==0) ? int(meta_floor_log2_bogus)
|
||||
: int(meta_floor_log2_move_up)
|
||||
};
|
||||
};
|
||||
|
||||
template<unsigned int n,
|
||||
int lower = 0,
|
||||
int upper = sizeof(unsigned int) * CHAR_BIT - 1,
|
||||
int selector = floor_log2_selector<n, lower, upper>::value>
|
||||
struct floor_log2 {};
|
||||
int selector = meta_floor_log2_selector<n, lower, upper>::value>
|
||||
struct meta_floor_log2 {};
|
||||
|
||||
template<unsigned int n, int lower, int upper>
|
||||
struct floor_log2<n, lower, upper, floor_log2_move_down>
|
||||
struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>
|
||||
{
|
||||
enum { value = floor_log2<n, lower, floor_log2_selector<n, lower, upper>::middle>::value };
|
||||
enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
|
||||
};
|
||||
|
||||
template<unsigned int n, int lower, int upper>
|
||||
struct floor_log2<n, lower, upper, floor_log2_move_up>
|
||||
struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>
|
||||
{
|
||||
enum { value = floor_log2<n, floor_log2_selector<n, lower, upper>::middle, upper>::value };
|
||||
enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
|
||||
};
|
||||
|
||||
template<unsigned int n, int lower, int upper>
|
||||
struct floor_log2<n, lower, upper, floor_log2_terminate>
|
||||
struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>
|
||||
{
|
||||
enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
|
||||
};
|
||||
|
||||
template<unsigned int n, int lower, int upper>
|
||||
struct floor_log2<n, lower, upper, floor_log2_bogus>
|
||||
struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
|
||||
{
|
||||
// no value, error at compile time
|
||||
};
|
||||
@ -551,7 +551,7 @@ struct random_default_impl<Scalar, false, true>
|
||||
#ifdef EIGEN_MAKING_DOCS
|
||||
return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
|
||||
#else
|
||||
enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value,
|
||||
enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
|
||||
scalar_bits = sizeof(Scalar) * CHAR_BIT,
|
||||
shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
|
||||
offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0
|
||||
|
@ -74,45 +74,23 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
|
||||
}
|
||||
}
|
||||
|
||||
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
|
||||
*
|
||||
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
|
||||
* \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
|
||||
* \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
|
||||
*
|
||||
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
|
||||
* this function computes the blocking size parameters along the respective dimensions
|
||||
* for matrix products and related algorithms. The blocking sizes depends on various
|
||||
* parameters:
|
||||
* - the L1 and L2 cache sizes,
|
||||
* - the register level blocking sizes defined by gebp_traits,
|
||||
* - the number of scalars that fit into a packet (when vectorization is enabled).
|
||||
*
|
||||
* \sa setCpuCacheSizes */
|
||||
/* Helper for computeProductBlockingSizes.
|
||||
*
|
||||
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
|
||||
* this function computes the blocking size parameters along the respective dimensions
|
||||
* for matrix products and related algorithms. The blocking sizes depends on various
|
||||
* parameters:
|
||||
* - the L1 and L2 cache sizes,
|
||||
* - the register level blocking sizes defined by gebp_traits,
|
||||
* - the number of scalars that fit into a packet (when vectorization is enabled).
|
||||
*
|
||||
* \sa setCpuCacheSizes */
|
||||
|
||||
template<typename LhsScalar, typename RhsScalar, int KcFactor>
|
||||
void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
|
||||
void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
|
||||
{
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
|
||||
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
|
||||
if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
|
||||
EIGEN_UNUSED_VARIABLE(num_threads);
|
||||
enum {
|
||||
kr = 8,
|
||||
mr = Traits::mr,
|
||||
nr = Traits::nr
|
||||
};
|
||||
k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
|
||||
if (k > kr) k -= k % kr;
|
||||
m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
|
||||
if (m > mr) m -= m % mr;
|
||||
n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
|
||||
if (n > nr) n -= n % nr;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Explanations:
|
||||
// Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and
|
||||
// kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed
|
||||
@ -281,6 +259,60 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
|
||||
}
|
||||
}
|
||||
|
||||
inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
|
||||
{
|
||||
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
|
||||
if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
|
||||
k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
|
||||
m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
|
||||
n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
EIGEN_UNUSED_VARIABLE(k)
|
||||
EIGEN_UNUSED_VARIABLE(m)
|
||||
EIGEN_UNUSED_VARIABLE(n)
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
|
||||
*
|
||||
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
|
||||
* \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
|
||||
* \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
|
||||
*
|
||||
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
|
||||
* this function computes the blocking size parameters along the respective dimensions
|
||||
* for matrix products and related algorithms.
|
||||
*
|
||||
* The blocking size parameters may be evaluated:
|
||||
* - either by a heuristic based on cache sizes;
|
||||
* - or using a precomputed lookup table;
|
||||
* - or using fixed prescribed values (for testing purposes).
|
||||
*
|
||||
* \sa setCpuCacheSizes */
|
||||
|
||||
template<typename LhsScalar, typename RhsScalar, int KcFactor>
|
||||
void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
|
||||
{
|
||||
if (!useSpecificBlockingSizes(k, m, n)) {
|
||||
if (!lookupBlockingSizesFromTable<LhsScalar, RhsScalar>(k, m, n, num_threads)) {
|
||||
evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads);
|
||||
}
|
||||
}
|
||||
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
enum {
|
||||
kr = 8,
|
||||
mr = Traits::mr,
|
||||
nr = Traits::nr
|
||||
};
|
||||
if (k > kr) k -= k % kr;
|
||||
if (m > mr) m -= m % mr;
|
||||
if (n > nr) n -= n % nr;
|
||||
}
|
||||
|
||||
template<typename LhsScalar, typename RhsScalar>
|
||||
inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
|
||||
{
|
||||
|
89
Eigen/src/Core/products/LookupBlockingSizesTable.h
Normal file
89
Eigen/src/Core/products/LookupBlockingSizesTable.h
Normal file
@ -0,0 +1,89 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
|
||||
#define EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template <typename LhsScalar,
|
||||
typename RhsScalar,
|
||||
bool HasLookupTable = BlockingSizesLookupTable<LhsScalar, RhsScalar>::NumSizes != 0 >
|
||||
struct LookupBlockingSizesFromTableImpl
|
||||
{
|
||||
static bool run(Index&, Index&, Index&, Index)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
inline uint8_t floor_log2_helper(uint16_t& x, size_t offset)
|
||||
{
|
||||
uint16_t y = x >> offset;
|
||||
if (y) {
|
||||
x = y;
|
||||
return offset;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline uint8_t floor_log2(uint16_t x)
|
||||
{
|
||||
return floor_log2_helper(x, 8)
|
||||
+ floor_log2_helper(x, 4)
|
||||
+ floor_log2_helper(x, 2)
|
||||
+ floor_log2_helper(x, 1);
|
||||
}
|
||||
|
||||
inline uint8_t ceil_log2(uint16_t x)
|
||||
{
|
||||
return x > 1 ? floor_log2(x - 1) + 1 : 0;
|
||||
}
|
||||
|
||||
template <typename LhsScalar,
|
||||
typename RhsScalar>
|
||||
struct LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar, true>
|
||||
{
|
||||
static bool run(Index& k, Index& m, Index& n, Index)
|
||||
{
|
||||
using std::min;
|
||||
using std::max;
|
||||
typedef BlockingSizesLookupTable<LhsScalar, RhsScalar> Table;
|
||||
const uint16_t minsize = Table::BaseSize;
|
||||
const uint16_t maxsize = minsize << (Table::NumSizes + 1);
|
||||
const uint16_t k_clamped = max<uint16_t>(minsize, min<Index>(k, maxsize));
|
||||
const uint16_t m_clamped = max<uint16_t>(minsize, min<Index>(m, maxsize));
|
||||
const uint16_t n_clamped = max<uint16_t>(minsize, min<Index>(n, maxsize));
|
||||
const size_t k_index = ceil_log2(k_clamped / minsize);
|
||||
const size_t m_index = ceil_log2(m_clamped / minsize);
|
||||
const size_t n_index = ceil_log2(n_clamped / minsize);
|
||||
const size_t index = n_index + Table::NumSizes * (m_index + Table::NumSizes * k_index);
|
||||
const uint16_t table_entry = Table::Data()[index];
|
||||
k = min(k, 1 << ((table_entry & 0xf00) >> 8));
|
||||
m = min(m, 1 << ((table_entry & 0x0f0) >> 4));
|
||||
n = min(n, 1 << ((table_entry & 0x00f) >> 0));
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename LhsScalar,
|
||||
typename RhsScalar>
|
||||
bool lookupBlockingSizesFromTable(Index& k, Index& m, Index& n, Index num_threads)
|
||||
{
|
||||
return LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar>::run(k, m, n, num_threads);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
|
@ -287,6 +287,14 @@ struct stem_function
|
||||
typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
|
||||
typedef ComplexScalar type(ComplexScalar, int);
|
||||
};
|
||||
|
||||
template <typename LhsScalar,
|
||||
typename RhsScalar>
|
||||
struct BlockingSizesLookupTable
|
||||
{
|
||||
static const size_t NumSizes = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
Loading…
Reference in New Issue
Block a user