Fix performance regression introduced in changeset e56aabf205

.
Register blocking sizes are better handled by the cache size heuristics.
The current code introduced very small blocks, for instance for 9x9 matrix,
thus killing performance.
This commit is contained in:
Gael Guennebaud 2016-07-02 15:40:56 +02:00
parent d161b8f03a
commit 672076db5d

View File

@ -299,16 +299,6 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
if (!useSpecificBlockingSizes(k, m, n)) {
evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor, Index>(k, m, n, num_threads);
}
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
enum {
kr = 8,
mr = Traits::mr,
nr = Traits::nr
};
if (k > kr) k -= k % kr;
if (m > mr) m -= m % mr;
if (n > nr) n -= n % nr;
}
template<typename LhsScalar, typename RhsScalar, typename Index>