From e56aabf205a1e8f581dd8a46d7d46ce79c45e158 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Sun, 15 Mar 2015 18:05:12 -0400 Subject: [PATCH] Refactor computeProductBlockingSizes to make room for the possibility of using lookup tables --- Eigen/Core | 1 + Eigen/src/Core/MathFunctions.h | 36 +++---- .../Core/products/GeneralBlockPanelKernel.h | 100 ++++++++++++------ .../Core/products/LookupBlockingSizesTable.h | 89 ++++++++++++++++ Eigen/src/Core/util/ForwardDeclarations.h | 8 ++ 5 files changed, 182 insertions(+), 52 deletions(-) create mode 100644 Eigen/src/Core/products/LookupBlockingSizesTable.h diff --git a/Eigen/Core b/Eigen/Core index 0b8eaa61c..138c34916 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -381,6 +381,7 @@ using std::ptrdiff_t; #include "src/Core/Inverse.h" #include "src/Core/TriangularMatrix.h" #include "src/Core/SelfAdjointView.h" +#include "src/Core/products/LookupBlockingSizesTable.h" #include "src/Core/products/GeneralBlockPanelKernel.h" #include "src/Core/products/Parallelizer.h" #include "src/Core/ProductEvaluators.h" diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 3c76a58b9..0fde5c71e 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -473,48 +473,48 @@ struct random_default_impl }; enum { - floor_log2_terminate, - floor_log2_move_up, - floor_log2_move_down, - floor_log2_bogus + meta_floor_log2_terminate, + meta_floor_log2_move_up, + meta_floor_log2_move_down, + meta_floor_log2_bogus }; -template struct floor_log2_selector +template struct meta_floor_log2_selector { enum { middle = (lower + upper) / 2, - value = (upper <= lower + 1) ? int(floor_log2_terminate) - : (n < (1 << middle)) ? int(floor_log2_move_down) - : (n==0) ? int(floor_log2_bogus) - : int(floor_log2_move_up) + value = (upper <= lower + 1) ? int(meta_floor_log2_terminate) + : (n < (1 << middle)) ? int(meta_floor_log2_move_down) + : (n==0) ? int(meta_floor_log2_bogus) + : int(meta_floor_log2_move_up) }; }; template::value> -struct floor_log2 {}; + int selector = meta_floor_log2_selector::value> +struct meta_floor_log2 {}; template -struct floor_log2 +struct meta_floor_log2 { - enum { value = floor_log2::middle>::value }; + enum { value = meta_floor_log2::middle>::value }; }; template -struct floor_log2 +struct meta_floor_log2 { - enum { value = floor_log2::middle, upper>::value }; + enum { value = meta_floor_log2::middle, upper>::value }; }; template -struct floor_log2 +struct meta_floor_log2 { enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower }; }; template -struct floor_log2 +struct meta_floor_log2 { // no value, error at compile time }; @@ -551,7 +551,7 @@ struct random_default_impl #ifdef EIGEN_MAKING_DOCS return run(Scalar(NumTraits::IsSigned ? -10 : 0), Scalar(10)); #else - enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value, + enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value, scalar_bits = sizeof(Scalar) * CHAR_BIT, shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)), offset = NumTraits::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0 diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 1b47f1a6d..617439ff6 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -74,45 +74,23 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff } } -/** \brief Computes the blocking parameters for a m x k times k x n matrix product - * - * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension. - * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension. - * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension. - * - * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, - * this function computes the blocking size parameters along the respective dimensions - * for matrix products and related algorithms. The blocking sizes depends on various - * parameters: - * - the L1 and L2 cache sizes, - * - the register level blocking sizes defined by gebp_traits, - * - the number of scalars that fit into a packet (when vectorization is enabled). - * - * \sa setCpuCacheSizes */ +/* Helper for computeProductBlockingSizes. + * + * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, + * this function computes the blocking size parameters along the respective dimensions + * for matrix products and related algorithms. The blocking sizes depends on various + * parameters: + * - the L1 and L2 cache sizes, + * - the register level blocking sizes defined by gebp_traits, + * - the number of scalars that fit into a packet (when vectorization is enabled). + * + * \sa setCpuCacheSizes */ template -void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) +void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1) { typedef gebp_traits Traits; -#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES - if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) { - EIGEN_UNUSED_VARIABLE(num_threads); - enum { - kr = 8, - mr = Traits::mr, - nr = Traits::nr - }; - k = std::min(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K); - if (k > kr) k -= k % kr; - m = std::min(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M); - if (m > mr) m -= m % mr; - n = std::min(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N); - if (n > nr) n -= n % nr; - return; - } -#endif - // Explanations: // Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and // kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed @@ -281,6 +259,60 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads } } +inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n) +{ +#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES + if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) { + k = std::min(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K); + m = std::min(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M); + n = std::min(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N); + return true; + } +#else + EIGEN_UNUSED_VARIABLE(k) + EIGEN_UNUSED_VARIABLE(m) + EIGEN_UNUSED_VARIABLE(n) + return false; +#endif +} + +/** \brief Computes the blocking parameters for a m x k times k x n matrix product + * + * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension. + * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension. + * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension. + * + * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, + * this function computes the blocking size parameters along the respective dimensions + * for matrix products and related algorithms. + * + * The blocking size parameters may be evaluated: + * - either by a heuristic based on cache sizes; + * - or using a precomputed lookup table; + * - or using fixed prescribed values (for testing purposes). + * + * \sa setCpuCacheSizes */ + +template +void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) +{ + if (!useSpecificBlockingSizes(k, m, n)) { + if (!lookupBlockingSizesFromTable(k, m, n, num_threads)) { + evaluateProductBlockingSizesHeuristic(k, m, n, num_threads); + } + } + + typedef gebp_traits Traits; + enum { + kr = 8, + mr = Traits::mr, + nr = Traits::nr + }; + if (k > kr) k -= k % kr; + if (m > mr) m -= m % mr; + if (n > nr) n -= n % nr; +} + template inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) { diff --git a/Eigen/src/Core/products/LookupBlockingSizesTable.h b/Eigen/src/Core/products/LookupBlockingSizesTable.h new file mode 100644 index 000000000..85aeedec8 --- /dev/null +++ b/Eigen/src/Core/products/LookupBlockingSizesTable.h @@ -0,0 +1,89 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H +#define EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H + +namespace Eigen { + +namespace internal { + +template ::NumSizes != 0 > +struct LookupBlockingSizesFromTableImpl +{ + static bool run(Index&, Index&, Index&, Index) + { + return false; + } +}; + +inline uint8_t floor_log2_helper(uint16_t& x, size_t offset) +{ + uint16_t y = x >> offset; + if (y) { + x = y; + return offset; + } else { + return 0; + } +} + +inline uint8_t floor_log2(uint16_t x) +{ + return floor_log2_helper(x, 8) + + floor_log2_helper(x, 4) + + floor_log2_helper(x, 2) + + floor_log2_helper(x, 1); +} + +inline uint8_t ceil_log2(uint16_t x) +{ + return x > 1 ? floor_log2(x - 1) + 1 : 0; +} + +template +struct LookupBlockingSizesFromTableImpl +{ + static bool run(Index& k, Index& m, Index& n, Index) + { + using std::min; + using std::max; + typedef BlockingSizesLookupTable Table; + const uint16_t minsize = Table::BaseSize; + const uint16_t maxsize = minsize << (Table::NumSizes + 1); + const uint16_t k_clamped = max(minsize, min(k, maxsize)); + const uint16_t m_clamped = max(minsize, min(m, maxsize)); + const uint16_t n_clamped = max(minsize, min(n, maxsize)); + const size_t k_index = ceil_log2(k_clamped / minsize); + const size_t m_index = ceil_log2(m_clamped / minsize); + const size_t n_index = ceil_log2(n_clamped / minsize); + const size_t index = n_index + Table::NumSizes * (m_index + Table::NumSizes * k_index); + const uint16_t table_entry = Table::Data()[index]; + k = min(k, 1 << ((table_entry & 0xf00) >> 8)); + m = min(m, 1 << ((table_entry & 0x0f0) >> 4)); + n = min(n, 1 << ((table_entry & 0x00f) >> 0)); + return true; + } +}; + +template +bool lookupBlockingSizesFromTable(Index& k, Index& m, Index& n, Index num_threads) +{ + return LookupBlockingSizesFromTableImpl::run(k, m, n, num_threads); +} + +} + +} + +#endif // EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index c23892c50..8034f9b5e 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -287,6 +287,14 @@ struct stem_function typedef std::complex::Real> ComplexScalar; typedef ComplexScalar type(ComplexScalar, int); }; + +template +struct BlockingSizesLookupTable +{ + static const size_t NumSizes = 0; +}; + } } // end namespace Eigen