From 5e478d32858ed092fa0b8a2df6ff16f89be398d6 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen <4643818-rmlarsen1@users.noreply.gitlab.com> Date: Thu, 12 Mar 2026 12:32:06 -0700 Subject: [PATCH] Improve product test coverage at critical code-path boundaries libeigen/eigen!2285 Co-authored-by: Rasmus Munk Larsen --- test/product_extra.cpp | 51 ++++++++++++ test/product_large.cpp | 158 ++++++++++++++++++++++++++++++++++++++ test/product_small.cpp | 29 +++++++ test/product_threaded.cpp | 50 +++++++++++- 4 files changed, 287 insertions(+), 1 deletion(-) diff --git a/test/product_extra.cpp b/test/product_extra.cpp index cb24d5094..5cad11a12 100644 --- a/test/product_extra.cpp +++ b/test/product_extra.cpp @@ -252,6 +252,7 @@ EIGEN_DONT_INLINE Index test_compute_block_size(Index m, Index n, Index k) { template Index compute_block_size() { Index ret = 0; + // Zero-sized inputs: verify they compile and don't crash. ret += test_compute_block_size(0, 1, 1); ret += test_compute_block_size(1, 0, 1); ret += test_compute_block_size(1, 1, 0); @@ -259,9 +260,58 @@ Index compute_block_size() { ret += test_compute_block_size(0, 1, 0); ret += test_compute_block_size(1, 0, 0); ret += test_compute_block_size(0, 0, 0); + + // Sanity checks: blocking sizes must be positive and not exceed the original. + { + Index m = 200, n = 200, k = 200; + Index mc = m, nc = n, kc = k; + internal::computeProductBlockingSizes(kc, mc, nc); + VERIFY(kc > 0 && kc <= k); + VERIFY(mc > 0 && mc <= m); + VERIFY(nc > 0 && nc <= n); + } + // With EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS (l1=9KB, l2=32KB, l3=512KB), + // large sizes must be actually blocked (not returned as-is). + { + Index m = 500, n = 500, k = 500; + Index mc = m, nc = n, kc = k; + internal::computeProductBlockingSizes(kc, mc, nc); + VERIFY(kc < k); + } + return ret; } +// Verify correctness of GEMM at sizes that require multiple blocking passes +// under EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS (l1=9KB, l2=32KB, l3=512KB). +// The blocking early-return threshold is max(k,m,n) < 48, so sizes >= 48 +// trigger actual multi-pass blocking with these tiny cache sizes. +// Verifies GEMM against column-by-column GEMV (a different code path). +template +void test_small_block_correctness() { + const int sizes[] = {48, 64, 96, 128, 200}; + for (int si = 0; si < 5; ++si) { + int n = sizes[si]; + MatrixXd A = MatrixXd::Random(n, n); + MatrixXd B = MatrixXd::Random(n, n); + MatrixXd C(n, n); + C.noalias() = A * B; + MatrixXd Cref(n, n); + for (int j = 0; j < n; ++j) Cref.col(j) = A * B.col(j); + VERIFY_IS_APPROX(C, Cref); + } + // Non-square: exercise different blocking in m, n, k dimensions. + { + MatrixXd A = MatrixXd::Random(200, 64); + MatrixXd B = MatrixXd::Random(64, 300); + MatrixXd C(200, 300); + C.noalias() = A * B; + MatrixXd Cref(200, 300); + for (int j = 0; j < 300; ++j) Cref.col(j) = A * B.col(j); + VERIFY_IS_APPROX(C, Cref); + } +} + template void aliasing_with_resize() { Index m = internal::random(10, 50); @@ -542,4 +592,5 @@ EIGEN_DECLARE_TEST(product_extra) { CALL_SUBTEST_7(compute_block_size >()); CALL_SUBTEST_8(aliasing_with_resize()); CALL_SUBTEST_9(product_custom_scalar_types<0>()); + CALL_SUBTEST_10(test_small_block_correctness<0>()); } diff --git a/test/product_large.cpp b/test/product_large.cpp index 79d77abd2..2fdc261ec 100644 --- a/test/product_large.cpp +++ b/test/product_large.cpp @@ -152,6 +152,159 @@ void bug_gemv_run_small_cols() { VERIFY_IS_APPROX(y, y_ref); } +// Systematic test of row-major GEMV run_small_cols and main run() remainder paths. +// Varies cols from 1-7 (covers float PacketSize=8 and double PacketSize=4 boundaries) +// and rows across values that exercise all n8/n4/n2/n1 remainder combinations. +template +void gemv_small_cols_systematic() { + const int test_cols[] = {1, 2, 3, 4, 5, 6, 7}; + const int test_rows[] = {1, 2, 3, 4, 5, 7, 8, 9, 11, 13, 15, 16, 17, 25}; + + // Large stride forces n8=0, exercising all remainder-only paths. + { + const int stride = 5000; // 5000 * sizeof(double) = 40000 > 32000 + for (int ci = 0; ci < 7; ++ci) { + for (int ri = 0; ri < 14; ++ri) { + int rows = test_rows[ri], cols = test_cols[ci]; + Matrix A_full(rows, stride); + A_full.setRandom(); + auto A = A_full.leftCols(cols); + VectorXd x = VectorXd::Random(cols); + VectorXd y = A * x; + VectorXd y_ref = VectorXd::Zero(rows); + for (int i = 0; i < rows; ++i) + for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j); + VERIFY_IS_APPROX(y, y_ref); + } + } + } + + // Normal stride (n8 active) to cover the 8-row main loop + remainders. + for (int ci = 0; ci < 7; ++ci) { + for (int ri = 0; ri < 14; ++ri) { + int rows = test_rows[ri], cols = test_cols[ci]; + Matrix A(rows, cols); + A.setRandom(); + VectorXd x = VectorXd::Random(cols); + VectorXd y = A * x; + VectorXd y_ref = VectorXd::Zero(rows); + for (int i = 0; i < rows; ++i) + for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j); + VERIFY_IS_APPROX(y, y_ref); + } + } + + // Float with large stride: 9000 * sizeof(float) = 36000 > 32000 + { + const int stride = 9000; + for (int ci = 0; ci < 7; ++ci) { + for (int ri = 0; ri < 14; ++ri) { + int rows = test_rows[ri], cols = test_cols[ci]; + Matrix A_full(rows, stride); + A_full.setRandom(); + auto A = A_full.leftCols(cols); + VectorXf x = VectorXf::Random(cols); + VectorXf y = A * x; + VectorXf y_ref = VectorXf::Zero(rows); + for (int i = 0; i < rows; ++i) + for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j); + VERIFY_IS_APPROX(y, y_ref); + } + } + } +} + +// Test the main row-major GEMV n8=0 path (not run_small_cols) with varied row counts. +// The n8 threshold is stride*sizeof(Scalar) > 32000. +template +void gemv_rowmajor_large_stride_varied_rows() { + const int test_rows[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 16, 17, 25, 100}; + // Double: cols=5000 (5000*8 > 32000), enough cols to stay on main run() path. + { + const int cols = 5000; + for (int ri = 0; ri < 16; ++ri) { + int rows = test_rows[ri]; + Matrix A(rows, cols); + A.setRandom(); + VectorXd x = VectorXd::Random(cols); + VectorXd y = A * x; + VectorXd y_ref = VectorXd::Zero(rows); + for (int i = 0; i < rows; ++i) + for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j); + VERIFY_IS_APPROX(y, y_ref); + } + } + // Float: cols=9000 (9000*4 > 32000). + { + const int cols = 9000; + for (int ri = 0; ri < 16; ++ri) { + int rows = test_rows[ri]; + Matrix A(rows, cols); + A.setRandom(); + VectorXf x = VectorXf::Random(cols); + VectorXf y = A * x; + VectorXf y_ref = VectorXf::Zero(rows); + for (int i = 0; i < rows; ++i) + for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j); + VERIFY_IS_APPROX(y, y_ref); + } + } +} + +// Test extreme aspect ratios that exercise GEMV, outer-product, and thin-GEMM dispatch. +template +void product_extreme_aspect_ratios() { + const int sizes[] = {1, 2, 3, 4, 8, 16, 48, 64, 128}; + for (int si = 0; si < 9; ++si) { + int s = sizes[si]; + for (int ki = 0; ki < 9; ++ki) { + int k = sizes[ki]; + // Thin result: s x k * k x 2 (2-column GEMM) + { + MatrixXd A = MatrixXd::Random(s, k); + MatrixXd B = MatrixXd::Random(k, 2); + MatrixXd C = A * B; + MatrixXd Cref = MatrixXd::Zero(s, 2); + for (int i = 0; i < s; ++i) + for (int j = 0; j < 2; ++j) + for (int kk = 0; kk < k; ++kk) Cref(i, j) += A(i, kk) * B(kk, j); + VERIFY_IS_APPROX(C, Cref); + } + // Wide result: 2 x k * k x s (2-row GEMM) + { + MatrixXd A = MatrixXd::Random(2, k); + MatrixXd B = MatrixXd::Random(k, s); + MatrixXd C = A * B; + MatrixXd Cref = MatrixXd::Zero(2, s); + for (int i = 0; i < 2; ++i) + for (int j = 0; j < s; ++j) + for (int kk = 0; kk < k; ++kk) Cref(i, j) += A(i, kk) * B(kk, j); + VERIFY_IS_APPROX(C, Cref); + } + // GEMV: s x k * k x 1 + { + MatrixXd A = MatrixXd::Random(s, k); + VectorXd x = VectorXd::Random(k); + VectorXd y = A * x; + VectorXd yref = VectorXd::Zero(s); + for (int i = 0; i < s; ++i) + for (int kk = 0; kk < k; ++kk) yref(i) += A(i, kk) * x(kk); + VERIFY_IS_APPROX(y, yref); + } + // Vec-mat: 1 x k * k x s + { + RowVectorXd v = RowVectorXd::Random(k); + MatrixXd B = MatrixXd::Random(k, s); + RowVectorXd r = v * B; + RowVectorXd rref = RowVectorXd::Zero(s); + for (int j = 0; j < s; ++j) + for (int kk = 0; kk < k; ++kk) rref(j) += v(kk) * B(kk, j); + VERIFY_IS_APPROX(r, rref); + } + } + } +} + template void bug_1622() { typedef Matrix Mat2X; @@ -193,11 +346,16 @@ EIGEN_DECLARE_TEST(product_large) { internal::random(1, EIGEN_TEST_MAX_SIZE), internal::random(1, EIGEN_TEST_MAX_SIZE)))); CALL_SUBTEST_11(product(Matrix( internal::random(1, EIGEN_TEST_MAX_SIZE), internal::random(1, EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_12(product(Matrix(internal::random(1, EIGEN_TEST_MAX_SIZE), + internal::random(1, EIGEN_TEST_MAX_SIZE)))); } CALL_SUBTEST_6(product_large_regressions<0>()); CALL_SUBTEST_6(bug_gemv_rowmajor_large_stride<0>()); CALL_SUBTEST_6(bug_gemv_run_small_cols<0>()); + CALL_SUBTEST_6(gemv_small_cols_systematic<0>()); + CALL_SUBTEST_6(gemv_rowmajor_large_stride_varied_rows<0>()); + CALL_SUBTEST_6(product_extreme_aspect_ratios<0>()); // Regression test for bug 714: #if defined EIGEN_HAS_OPENMP diff --git a/test/product_small.cpp b/test/product_small.cpp index 1b1eef1e6..1ad06f61c 100644 --- a/test/product_small.cpp +++ b/test/product_small.cpp @@ -275,6 +275,31 @@ void product_small_regressions() { } } +// Test products at sizes near critical code-path transitions: +// - EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD = 8 (coeff-based vs GEBP) +// - Blocking early-return at max(k,m,n) < 48 +// Uses a sparse set of sizes so total count is 14^3 = 2744 (fast). +template +void product_transition_sizes() { + using Matrix = Eigen::Matrix; + const int critical[] = {1, 2, 4, 7, 8, 9, 12, 16, 24, 32, 47, 48, 49, 64}; + for (int im = 0; im < 14; ++im) { + for (int in = 0; in < 14; ++in) { + Matrix C = Matrix::Zero(critical[im], critical[in]); + Matrix Cref = Matrix::Zero(critical[im], critical[in]); + for (int ik = 0; ik < 14; ++ik) { + int m = critical[im], n = critical[in], k = critical[ik]; + Matrix A = Matrix::Random(m, k); + Matrix B = Matrix::Random(k, n); + C = A * B; + Cref.setZero(); + ref_prod(Cref, A, B); + VERIFY_IS_APPROX(C, Cref); + } + } + } +} + template void product_sweep(int max_m, int max_k, int max_n) { using Matrix = Eigen::Matrix; @@ -338,4 +363,8 @@ EIGEN_DECLARE_TEST(product_small) { } CALL_SUBTEST_6(product_small_regressions<0>()); + + // Deterministic sweep at transition boundaries (outside g_repeat). + CALL_SUBTEST_54(product_transition_sizes()); + CALL_SUBTEST_55(product_transition_sizes()); } diff --git a/test/product_threaded.cpp b/test/product_threaded.cpp index 90d5d99c4..6bd9397bb 100644 --- a/test/product_threaded.cpp +++ b/test/product_threaded.cpp @@ -24,6 +24,54 @@ void test_parallelize_gemm() { c_threaded.noalias() = a * b; VERIFY_IS_APPROX(c, c_threaded); + Eigen::setGemmThreadPool(nullptr); } -EIGEN_DECLARE_TEST(product_threaded) { CALL_SUBTEST(test_parallelize_gemm()); } +void test_parallelize_gemm_varied() { + constexpr int num_threads = 4; + ThreadPool pool(num_threads); + + // Non-square float + { + MatrixXf a = MatrixXf::Random(512, 2048); + MatrixXf b = MatrixXf::Random(2048, 256); + MatrixXf c_serial(512, 256); + c_serial.noalias() = a * b; + Eigen::setGemmThreadPool(&pool); + MatrixXf c_threaded(512, 256); + c_threaded.noalias() = a * b; + Eigen::setGemmThreadPool(nullptr); + VERIFY_IS_APPROX(c_serial, c_threaded); + } + + // Double + { + MatrixXd a = MatrixXd::Random(512, 512); + MatrixXd b = MatrixXd::Random(512, 512); + MatrixXd c_serial(512, 512); + c_serial.noalias() = a * b; + Eigen::setGemmThreadPool(&pool); + MatrixXd c_threaded(512, 512); + c_threaded.noalias() = a * b; + Eigen::setGemmThreadPool(nullptr); + VERIFY_IS_APPROX(c_serial, c_threaded); + } + + // Complex double + { + MatrixXcd a = MatrixXcd::Random(256, 256); + MatrixXcd b = MatrixXcd::Random(256, 256); + MatrixXcd c_serial(256, 256); + c_serial.noalias() = a * b; + Eigen::setGemmThreadPool(&pool); + MatrixXcd c_threaded(256, 256); + c_threaded.noalias() = a * b; + Eigen::setGemmThreadPool(nullptr); + VERIFY_IS_APPROX(c_serial, c_threaded); + } +} + +EIGEN_DECLARE_TEST(product_threaded) { + CALL_SUBTEST_1(test_parallelize_gemm()); + CALL_SUBTEST_2(test_parallelize_gemm_varied()); +}