Improve product test coverage at critical code-path boundaries

libeigen/eigen!2285

Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
Rasmus Munk Larsen
2026-03-12 12:32:06 -07:00
parent 3a2ba7c434
commit 5e478d3285
4 changed files with 287 additions and 1 deletions

View File

@@ -252,6 +252,7 @@ EIGEN_DONT_INLINE Index test_compute_block_size(Index m, Index n, Index k) {
template <typename T>
Index compute_block_size() {
Index ret = 0;
// Zero-sized inputs: verify they compile and don't crash.
ret += test_compute_block_size<T>(0, 1, 1);
ret += test_compute_block_size<T>(1, 0, 1);
ret += test_compute_block_size<T>(1, 1, 0);
@@ -259,9 +260,58 @@ Index compute_block_size() {
ret += test_compute_block_size<T>(0, 1, 0);
ret += test_compute_block_size<T>(1, 0, 0);
ret += test_compute_block_size<T>(0, 0, 0);
// Sanity checks: blocking sizes must be positive and not exceed the original.
{
Index m = 200, n = 200, k = 200;
Index mc = m, nc = n, kc = k;
internal::computeProductBlockingSizes<T, T>(kc, mc, nc);
VERIFY(kc > 0 && kc <= k);
VERIFY(mc > 0 && mc <= m);
VERIFY(nc > 0 && nc <= n);
}
// With EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS (l1=9KB, l2=32KB, l3=512KB),
// large sizes must be actually blocked (not returned as-is).
{
Index m = 500, n = 500, k = 500;
Index mc = m, nc = n, kc = k;
internal::computeProductBlockingSizes<T, T>(kc, mc, nc);
VERIFY(kc < k);
}
return ret;
}
// Verify correctness of GEMM at sizes that require multiple blocking passes
// under EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS (l1=9KB, l2=32KB, l3=512KB).
// The blocking early-return threshold is max(k,m,n) < 48, so sizes >= 48
// trigger actual multi-pass blocking with these tiny cache sizes.
// Verifies GEMM against column-by-column GEMV (a different code path).
template <int>
void test_small_block_correctness() {
const int sizes[] = {48, 64, 96, 128, 200};
for (int si = 0; si < 5; ++si) {
int n = sizes[si];
MatrixXd A = MatrixXd::Random(n, n);
MatrixXd B = MatrixXd::Random(n, n);
MatrixXd C(n, n);
C.noalias() = A * B;
MatrixXd Cref(n, n);
for (int j = 0; j < n; ++j) Cref.col(j) = A * B.col(j);
VERIFY_IS_APPROX(C, Cref);
}
// Non-square: exercise different blocking in m, n, k dimensions.
{
MatrixXd A = MatrixXd::Random(200, 64);
MatrixXd B = MatrixXd::Random(64, 300);
MatrixXd C(200, 300);
C.noalias() = A * B;
MatrixXd Cref(200, 300);
for (int j = 0; j < 300; ++j) Cref.col(j) = A * B.col(j);
VERIFY_IS_APPROX(C, Cref);
}
}
template <typename>
void aliasing_with_resize() {
Index m = internal::random<Index>(10, 50);
@@ -542,4 +592,5 @@ EIGEN_DECLARE_TEST(product_extra) {
CALL_SUBTEST_7(compute_block_size<std::complex<double> >());
CALL_SUBTEST_8(aliasing_with_resize<void>());
CALL_SUBTEST_9(product_custom_scalar_types<0>());
CALL_SUBTEST_10(test_small_block_correctness<0>());
}

View File

@@ -152,6 +152,159 @@ void bug_gemv_run_small_cols() {
VERIFY_IS_APPROX(y, y_ref);
}
// Systematic test of row-major GEMV run_small_cols and main run() remainder paths.
// Varies cols from 1-7 (covers float PacketSize=8 and double PacketSize=4 boundaries)
// and rows across values that exercise all n8/n4/n2/n1 remainder combinations.
template <int>
void gemv_small_cols_systematic() {
const int test_cols[] = {1, 2, 3, 4, 5, 6, 7};
const int test_rows[] = {1, 2, 3, 4, 5, 7, 8, 9, 11, 13, 15, 16, 17, 25};
// Large stride forces n8=0, exercising all remainder-only paths.
{
const int stride = 5000; // 5000 * sizeof(double) = 40000 > 32000
for (int ci = 0; ci < 7; ++ci) {
for (int ri = 0; ri < 14; ++ri) {
int rows = test_rows[ri], cols = test_cols[ci];
Matrix<double, Dynamic, Dynamic, RowMajor> A_full(rows, stride);
A_full.setRandom();
auto A = A_full.leftCols(cols);
VectorXd x = VectorXd::Random(cols);
VectorXd y = A * x;
VectorXd y_ref = VectorXd::Zero(rows);
for (int i = 0; i < rows; ++i)
for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j);
VERIFY_IS_APPROX(y, y_ref);
}
}
}
// Normal stride (n8 active) to cover the 8-row main loop + remainders.
for (int ci = 0; ci < 7; ++ci) {
for (int ri = 0; ri < 14; ++ri) {
int rows = test_rows[ri], cols = test_cols[ci];
Matrix<double, Dynamic, Dynamic, RowMajor> A(rows, cols);
A.setRandom();
VectorXd x = VectorXd::Random(cols);
VectorXd y = A * x;
VectorXd y_ref = VectorXd::Zero(rows);
for (int i = 0; i < rows; ++i)
for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j);
VERIFY_IS_APPROX(y, y_ref);
}
}
// Float with large stride: 9000 * sizeof(float) = 36000 > 32000
{
const int stride = 9000;
for (int ci = 0; ci < 7; ++ci) {
for (int ri = 0; ri < 14; ++ri) {
int rows = test_rows[ri], cols = test_cols[ci];
Matrix<float, Dynamic, Dynamic, RowMajor> A_full(rows, stride);
A_full.setRandom();
auto A = A_full.leftCols(cols);
VectorXf x = VectorXf::Random(cols);
VectorXf y = A * x;
VectorXf y_ref = VectorXf::Zero(rows);
for (int i = 0; i < rows; ++i)
for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j);
VERIFY_IS_APPROX(y, y_ref);
}
}
}
}
// Test the main row-major GEMV n8=0 path (not run_small_cols) with varied row counts.
// The n8 threshold is stride*sizeof(Scalar) > 32000.
template <int>
void gemv_rowmajor_large_stride_varied_rows() {
const int test_rows[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 16, 17, 25, 100};
// Double: cols=5000 (5000*8 > 32000), enough cols to stay on main run() path.
{
const int cols = 5000;
for (int ri = 0; ri < 16; ++ri) {
int rows = test_rows[ri];
Matrix<double, Dynamic, Dynamic, RowMajor> A(rows, cols);
A.setRandom();
VectorXd x = VectorXd::Random(cols);
VectorXd y = A * x;
VectorXd y_ref = VectorXd::Zero(rows);
for (int i = 0; i < rows; ++i)
for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j);
VERIFY_IS_APPROX(y, y_ref);
}
}
// Float: cols=9000 (9000*4 > 32000).
{
const int cols = 9000;
for (int ri = 0; ri < 16; ++ri) {
int rows = test_rows[ri];
Matrix<float, Dynamic, Dynamic, RowMajor> A(rows, cols);
A.setRandom();
VectorXf x = VectorXf::Random(cols);
VectorXf y = A * x;
VectorXf y_ref = VectorXf::Zero(rows);
for (int i = 0; i < rows; ++i)
for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j);
VERIFY_IS_APPROX(y, y_ref);
}
}
}
// Test extreme aspect ratios that exercise GEMV, outer-product, and thin-GEMM dispatch.
template <int>
void product_extreme_aspect_ratios() {
const int sizes[] = {1, 2, 3, 4, 8, 16, 48, 64, 128};
for (int si = 0; si < 9; ++si) {
int s = sizes[si];
for (int ki = 0; ki < 9; ++ki) {
int k = sizes[ki];
// Thin result: s x k * k x 2 (2-column GEMM)
{
MatrixXd A = MatrixXd::Random(s, k);
MatrixXd B = MatrixXd::Random(k, 2);
MatrixXd C = A * B;
MatrixXd Cref = MatrixXd::Zero(s, 2);
for (int i = 0; i < s; ++i)
for (int j = 0; j < 2; ++j)
for (int kk = 0; kk < k; ++kk) Cref(i, j) += A(i, kk) * B(kk, j);
VERIFY_IS_APPROX(C, Cref);
}
// Wide result: 2 x k * k x s (2-row GEMM)
{
MatrixXd A = MatrixXd::Random(2, k);
MatrixXd B = MatrixXd::Random(k, s);
MatrixXd C = A * B;
MatrixXd Cref = MatrixXd::Zero(2, s);
for (int i = 0; i < 2; ++i)
for (int j = 0; j < s; ++j)
for (int kk = 0; kk < k; ++kk) Cref(i, j) += A(i, kk) * B(kk, j);
VERIFY_IS_APPROX(C, Cref);
}
// GEMV: s x k * k x 1
{
MatrixXd A = MatrixXd::Random(s, k);
VectorXd x = VectorXd::Random(k);
VectorXd y = A * x;
VectorXd yref = VectorXd::Zero(s);
for (int i = 0; i < s; ++i)
for (int kk = 0; kk < k; ++kk) yref(i) += A(i, kk) * x(kk);
VERIFY_IS_APPROX(y, yref);
}
// Vec-mat: 1 x k * k x s
{
RowVectorXd v = RowVectorXd::Random(k);
MatrixXd B = MatrixXd::Random(k, s);
RowVectorXd r = v * B;
RowVectorXd rref = RowVectorXd::Zero(s);
for (int j = 0; j < s; ++j)
for (int kk = 0; kk < k; ++kk) rref(j) += v(kk) * B(kk, j);
VERIFY_IS_APPROX(r, rref);
}
}
}
}
template <int>
void bug_1622() {
typedef Matrix<double, 2, -1, 0, 2, -1> Mat2X;
@@ -193,11 +346,16 @@ EIGEN_DECLARE_TEST(product_large) {
internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
CALL_SUBTEST_11(product(Matrix<bfloat16, Dynamic, Dynamic, RowMajor>(
internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
CALL_SUBTEST_12(product(Matrix<Eigen::half, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE),
internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
}
CALL_SUBTEST_6(product_large_regressions<0>());
CALL_SUBTEST_6(bug_gemv_rowmajor_large_stride<0>());
CALL_SUBTEST_6(bug_gemv_run_small_cols<0>());
CALL_SUBTEST_6(gemv_small_cols_systematic<0>());
CALL_SUBTEST_6(gemv_rowmajor_large_stride_varied_rows<0>());
CALL_SUBTEST_6(product_extreme_aspect_ratios<0>());
// Regression test for bug 714:
#if defined EIGEN_HAS_OPENMP

View File

@@ -275,6 +275,31 @@ void product_small_regressions() {
}
}
// Test products at sizes near critical code-path transitions:
// - EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD = 8 (coeff-based vs GEBP)
// - Blocking early-return at max(k,m,n) < 48
// Uses a sparse set of sizes so total count is 14^3 = 2744 (fast).
template <typename T>
void product_transition_sizes() {
using Matrix = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>;
const int critical[] = {1, 2, 4, 7, 8, 9, 12, 16, 24, 32, 47, 48, 49, 64};
for (int im = 0; im < 14; ++im) {
for (int in = 0; in < 14; ++in) {
Matrix C = Matrix::Zero(critical[im], critical[in]);
Matrix Cref = Matrix::Zero(critical[im], critical[in]);
for (int ik = 0; ik < 14; ++ik) {
int m = critical[im], n = critical[in], k = critical[ik];
Matrix A = Matrix::Random(m, k);
Matrix B = Matrix::Random(k, n);
C = A * B;
Cref.setZero();
ref_prod(Cref, A, B);
VERIFY_IS_APPROX(C, Cref);
}
}
}
}
template <typename T>
void product_sweep(int max_m, int max_k, int max_n) {
using Matrix = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>;
@@ -338,4 +363,8 @@ EIGEN_DECLARE_TEST(product_small) {
}
CALL_SUBTEST_6(product_small_regressions<0>());
// Deterministic sweep at transition boundaries (outside g_repeat).
CALL_SUBTEST_54(product_transition_sizes<float>());
CALL_SUBTEST_55(product_transition_sizes<double>());
}

View File

@@ -24,6 +24,54 @@ void test_parallelize_gemm() {
c_threaded.noalias() = a * b;
VERIFY_IS_APPROX(c, c_threaded);
Eigen::setGemmThreadPool(nullptr);
}
EIGEN_DECLARE_TEST(product_threaded) { CALL_SUBTEST(test_parallelize_gemm()); }
void test_parallelize_gemm_varied() {
constexpr int num_threads = 4;
ThreadPool pool(num_threads);
// Non-square float
{
MatrixXf a = MatrixXf::Random(512, 2048);
MatrixXf b = MatrixXf::Random(2048, 256);
MatrixXf c_serial(512, 256);
c_serial.noalias() = a * b;
Eigen::setGemmThreadPool(&pool);
MatrixXf c_threaded(512, 256);
c_threaded.noalias() = a * b;
Eigen::setGemmThreadPool(nullptr);
VERIFY_IS_APPROX(c_serial, c_threaded);
}
// Double
{
MatrixXd a = MatrixXd::Random(512, 512);
MatrixXd b = MatrixXd::Random(512, 512);
MatrixXd c_serial(512, 512);
c_serial.noalias() = a * b;
Eigen::setGemmThreadPool(&pool);
MatrixXd c_threaded(512, 512);
c_threaded.noalias() = a * b;
Eigen::setGemmThreadPool(nullptr);
VERIFY_IS_APPROX(c_serial, c_threaded);
}
// Complex double
{
MatrixXcd a = MatrixXcd::Random(256, 256);
MatrixXcd b = MatrixXcd::Random(256, 256);
MatrixXcd c_serial(256, 256);
c_serial.noalias() = a * b;
Eigen::setGemmThreadPool(&pool);
MatrixXcd c_threaded(256, 256);
c_threaded.noalias() = a * b;
Eigen::setGemmThreadPool(nullptr);
VERIFY_IS_APPROX(c_serial, c_threaded);
}
}
EIGEN_DECLARE_TEST(product_threaded) {
CALL_SUBTEST_1(test_parallelize_gemm());
CALL_SUBTEST_2(test_parallelize_gemm_varied());
}