Refactor computeProductBlockingSizes to make room for the possibility of using lookup tables

This commit is contained in:
Benoit Jacob 2015-03-15 18:05:12 -04:00
parent b6b88c0808
commit e56aabf205
5 changed files with 182 additions and 52 deletions

View File

@ -381,6 +381,7 @@ using std::ptrdiff_t;
#include "src/Core/Inverse.h"
#include "src/Core/TriangularMatrix.h"
#include "src/Core/SelfAdjointView.h"
#include "src/Core/products/LookupBlockingSizesTable.h"
#include "src/Core/products/GeneralBlockPanelKernel.h"
#include "src/Core/products/Parallelizer.h"
#include "src/Core/ProductEvaluators.h"

View File

@ -473,48 +473,48 @@ struct random_default_impl<Scalar, false, false>
};
enum {
floor_log2_terminate,
floor_log2_move_up,
floor_log2_move_down,
floor_log2_bogus
meta_floor_log2_terminate,
meta_floor_log2_move_up,
meta_floor_log2_move_down,
meta_floor_log2_bogus
};
template<unsigned int n, int lower, int upper> struct floor_log2_selector
template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector
{
enum { middle = (lower + upper) / 2,
value = (upper <= lower + 1) ? int(floor_log2_terminate)
: (n < (1 << middle)) ? int(floor_log2_move_down)
: (n==0) ? int(floor_log2_bogus)
: int(floor_log2_move_up)
value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)
: (n < (1 << middle)) ? int(meta_floor_log2_move_down)
: (n==0) ? int(meta_floor_log2_bogus)
: int(meta_floor_log2_move_up)
};
};
template<unsigned int n,
int lower = 0,
int upper = sizeof(unsigned int) * CHAR_BIT - 1,
int selector = floor_log2_selector<n, lower, upper>::value>
struct floor_log2 {};
int selector = meta_floor_log2_selector<n, lower, upper>::value>
struct meta_floor_log2 {};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_move_down>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>
{
enum { value = floor_log2<n, lower, floor_log2_selector<n, lower, upper>::middle>::value };
enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_move_up>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>
{
enum { value = floor_log2<n, floor_log2_selector<n, lower, upper>::middle, upper>::value };
enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_terminate>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>
{
enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_bogus>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
{
// no value, error at compile time
};
@ -551,7 +551,7 @@ struct random_default_impl<Scalar, false, true>
#ifdef EIGEN_MAKING_DOCS
return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
#else
enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value,
enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
scalar_bits = sizeof(Scalar) * CHAR_BIT,
shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0

View File

@ -74,45 +74,23 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
}
}
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
*
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
* \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
* \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
*
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
* this function computes the blocking size parameters along the respective dimensions
* for matrix products and related algorithms. The blocking sizes depends on various
* parameters:
* - the L1 and L2 cache sizes,
* - the register level blocking sizes defined by gebp_traits,
* - the number of scalars that fit into a packet (when vectorization is enabled).
*
* \sa setCpuCacheSizes */
/* Helper for computeProductBlockingSizes.
*
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
* this function computes the blocking size parameters along the respective dimensions
* for matrix products and related algorithms. The blocking sizes depends on various
* parameters:
* - the L1 and L2 cache sizes,
* - the register level blocking sizes defined by gebp_traits,
* - the number of scalars that fit into a packet (when vectorization is enabled).
*
* \sa setCpuCacheSizes */
template<typename LhsScalar, typename RhsScalar, int KcFactor>
void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
{
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
EIGEN_UNUSED_VARIABLE(num_threads);
enum {
kr = 8,
mr = Traits::mr,
nr = Traits::nr
};
k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
if (k > kr) k -= k % kr;
m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
if (m > mr) m -= m % mr;
n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
if (n > nr) n -= n % nr;
return;
}
#endif
// Explanations:
// Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and
// kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed
@ -281,6 +259,60 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
}
}
inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
{
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
return true;
}
#else
EIGEN_UNUSED_VARIABLE(k)
EIGEN_UNUSED_VARIABLE(m)
EIGEN_UNUSED_VARIABLE(n)
return false;
#endif
}
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
*
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
* \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
* \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
*
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
* this function computes the blocking size parameters along the respective dimensions
* for matrix products and related algorithms.
*
* The blocking size parameters may be evaluated:
* - either by a heuristic based on cache sizes;
* - or using a precomputed lookup table;
* - or using fixed prescribed values (for testing purposes).
*
* \sa setCpuCacheSizes */
template<typename LhsScalar, typename RhsScalar, int KcFactor>
void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
{
if (!useSpecificBlockingSizes(k, m, n)) {
if (!lookupBlockingSizesFromTable<LhsScalar, RhsScalar>(k, m, n, num_threads)) {
evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads);
}
}
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
enum {
kr = 8,
mr = Traits::mr,
nr = Traits::nr
};
if (k > kr) k -= k % kr;
if (m > mr) m -= m % mr;
if (n > nr) n -= n % nr;
}
template<typename LhsScalar, typename RhsScalar>
inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
{

View File

@ -0,0 +1,89 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
#define EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
namespace Eigen {
namespace internal {
template <typename LhsScalar,
typename RhsScalar,
bool HasLookupTable = BlockingSizesLookupTable<LhsScalar, RhsScalar>::NumSizes != 0 >
struct LookupBlockingSizesFromTableImpl
{
static bool run(Index&, Index&, Index&, Index)
{
return false;
}
};
inline uint8_t floor_log2_helper(uint16_t& x, size_t offset)
{
uint16_t y = x >> offset;
if (y) {
x = y;
return offset;
} else {
return 0;
}
}
inline uint8_t floor_log2(uint16_t x)
{
return floor_log2_helper(x, 8)
+ floor_log2_helper(x, 4)
+ floor_log2_helper(x, 2)
+ floor_log2_helper(x, 1);
}
inline uint8_t ceil_log2(uint16_t x)
{
return x > 1 ? floor_log2(x - 1) + 1 : 0;
}
template <typename LhsScalar,
typename RhsScalar>
struct LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar, true>
{
static bool run(Index& k, Index& m, Index& n, Index)
{
using std::min;
using std::max;
typedef BlockingSizesLookupTable<LhsScalar, RhsScalar> Table;
const uint16_t minsize = Table::BaseSize;
const uint16_t maxsize = minsize << (Table::NumSizes + 1);
const uint16_t k_clamped = max<uint16_t>(minsize, min<Index>(k, maxsize));
const uint16_t m_clamped = max<uint16_t>(minsize, min<Index>(m, maxsize));
const uint16_t n_clamped = max<uint16_t>(minsize, min<Index>(n, maxsize));
const size_t k_index = ceil_log2(k_clamped / minsize);
const size_t m_index = ceil_log2(m_clamped / minsize);
const size_t n_index = ceil_log2(n_clamped / minsize);
const size_t index = n_index + Table::NumSizes * (m_index + Table::NumSizes * k_index);
const uint16_t table_entry = Table::Data()[index];
k = min(k, 1 << ((table_entry & 0xf00) >> 8));
m = min(m, 1 << ((table_entry & 0x0f0) >> 4));
n = min(n, 1 << ((table_entry & 0x00f) >> 0));
return true;
}
};
template <typename LhsScalar,
typename RhsScalar>
bool lookupBlockingSizesFromTable(Index& k, Index& m, Index& n, Index num_threads)
{
return LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar>::run(k, m, n, num_threads);
}
}
}
#endif // EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H

View File

@ -287,6 +287,14 @@ struct stem_function
typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
typedef ComplexScalar type(ComplexScalar, int);
};
template <typename LhsScalar,
typename RhsScalar>
struct BlockingSizesLookupTable
{
static const size_t NumSizes = 0;
};
}
} // end namespace Eigen