mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Fix performance regression introduced in changeset e56aabf205
. Register blocking sizes are better handled by the cache size heuristics. The current code introduced very small blocks, for instance for 9x9 matrix, thus killing performance.
This commit is contained in:
@@ -299,16 +299,6 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
|
||||
if (!useSpecificBlockingSizes(k, m, n)) {
|
||||
evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor, Index>(k, m, n, num_threads);
|
||||
}
|
||||
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
enum {
|
||||
kr = 8,
|
||||
mr = Traits::mr,
|
||||
nr = Traits::nr
|
||||
};
|
||||
if (k > kr) k -= k % kr;
|
||||
if (m > mr) m -= m % mr;
|
||||
if (n > nr) n -= n % nr;
|
||||
}
|
||||
|
||||
template<typename LhsScalar, typename RhsScalar, typename Index>
|
||||
|
||||
Reference in New Issue
Block a user