From 5e478d32858ed092fa0b8a2df6ff16f89be398d6 Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <4643818-rmlarsen1@users.noreply.gitlab.com>
Date: Thu, 12 Mar 2026 12:32:06 -0700
Subject: [PATCH] Improve product test coverage at critical code-path
 boundaries

libeigen/eigen!2285

Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
---
 test/product_extra.cpp    |  51 ++++++++++++
 test/product_large.cpp    | 158 ++++++++++++++++++++++++++++++++++++++
 test/product_small.cpp    |  29 +++++++
 test/product_threaded.cpp |  50 +++++++++++-
 4 files changed, 287 insertions(+), 1 deletion(-)
diff --git a/test/product_extra.cpp b/test/product_extra.cpp
index cb24d5094..5cad11a12 100644
--- a/test/product_extra.cpp
+++ b/test/product_extra.cpp
@@ -252,6 +252,7 @@ EIGEN_DONT_INLINE Index test_compute_block_size(Index m, Index n, Index k) {
 template <typename T>
 Index compute_block_size() {
   Index ret = 0;
+  // Zero-sized inputs: verify they compile and don't crash.
   ret += test_compute_block_size<T>(0, 1, 1);
   ret += test_compute_block_size<T>(1, 0, 1);
   ret += test_compute_block_size<T>(1, 1, 0);
@@ -259,9 +260,58 @@ Index compute_block_size() {
   ret += test_compute_block_size<T>(0, 1, 0);
   ret += test_compute_block_size<T>(1, 0, 0);
   ret += test_compute_block_size<T>(0, 0, 0);
+
+  // Sanity checks: blocking sizes must be positive and not exceed the original.
+  {
+    Index m = 200, n = 200, k = 200;
+    Index mc = m, nc = n, kc = k;
+    internal::computeProductBlockingSizes<T, T>(kc, mc, nc);
+    VERIFY(kc > 0 && kc <= k);
+    VERIFY(mc > 0 && mc <= m);
+    VERIFY(nc > 0 && nc <= n);
+  }
+  // With EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS (l1=9KB, l2=32KB, l3=512KB),
+  // large sizes must be actually blocked (not returned as-is).
+  {
+    Index m = 500, n = 500, k = 500;
+    Index mc = m, nc = n, kc = k;
+    internal::computeProductBlockingSizes<T, T>(kc, mc, nc);
+    VERIFY(kc < k);
+  }
+
   return ret;
 }
 
+// Verify correctness of GEMM at sizes that require multiple blocking passes
+// under EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS (l1=9KB, l2=32KB, l3=512KB).
+// The blocking early-return threshold is max(k,m,n) < 48, so sizes >= 48
+// trigger actual multi-pass blocking with these tiny cache sizes.
+// Verifies GEMM against column-by-column GEMV (a different code path).
+template <int>
+void test_small_block_correctness() {
+  const int sizes[] = {48, 64, 96, 128, 200};
+  for (int si = 0; si < 5; ++si) {
+    int n = sizes[si];
+    MatrixXd A = MatrixXd::Random(n, n);
+    MatrixXd B = MatrixXd::Random(n, n);
+    MatrixXd C(n, n);
+    C.noalias() = A * B;
+    MatrixXd Cref(n, n);
+    for (int j = 0; j < n; ++j) Cref.col(j) = A * B.col(j);
+    VERIFY_IS_APPROX(C, Cref);
+  }
+  // Non-square: exercise different blocking in m, n, k dimensions.
+  {
+    MatrixXd A = MatrixXd::Random(200, 64);
+    MatrixXd B = MatrixXd::Random(64, 300);
+    MatrixXd C(200, 300);
+    C.noalias() = A * B;
+    MatrixXd Cref(200, 300);
+    for (int j = 0; j < 300; ++j) Cref.col(j) = A * B.col(j);
+    VERIFY_IS_APPROX(C, Cref);
+  }
+}
+
 template <typename>
 void aliasing_with_resize() {
   Index m = internal::random<Index>(10, 50);
@@ -542,4 +592,5 @@ EIGEN_DECLARE_TEST(product_extra) {
   CALL_SUBTEST_7(compute_block_size<std::complex<double> >());
   CALL_SUBTEST_8(aliasing_with_resize<void>());
   CALL_SUBTEST_9(product_custom_scalar_types<0>());
+  CALL_SUBTEST_10(test_small_block_correctness<0>());
 }
diff --git a/test/product_large.cpp b/test/product_large.cpp
index 79d77abd2..2fdc261ec 100644
--- a/test/product_large.cpp
+++ b/test/product_large.cpp
@@ -152,6 +152,159 @@ void bug_gemv_run_small_cols() {
   VERIFY_IS_APPROX(y, y_ref);
 }
 
+// Systematic test of row-major GEMV run_small_cols and main run() remainder paths.
+// Varies cols from 1-7 (covers float PacketSize=8 and double PacketSize=4 boundaries)
+// and rows across values that exercise all n8/n4/n2/n1 remainder combinations.
+template <int>
+void gemv_small_cols_systematic() {
+  const int test_cols[] = {1, 2, 3, 4, 5, 6, 7};
+  const int test_rows[] = {1, 2, 3, 4, 5, 7, 8, 9, 11, 13, 15, 16, 17, 25};
+
+  // Large stride forces n8=0, exercising all remainder-only paths.
+  {
+    const int stride = 5000;  // 5000 * sizeof(double) = 40000 > 32000
+    for (int ci = 0; ci < 7; ++ci) {
+      for (int ri = 0; ri < 14; ++ri) {
+        int rows = test_rows[ri], cols = test_cols[ci];
+        Matrix<double, Dynamic, Dynamic, RowMajor> A_full(rows, stride);
+        A_full.setRandom();
+        auto A = A_full.leftCols(cols);
+        VectorXd x = VectorXd::Random(cols);
+        VectorXd y = A * x;
+        VectorXd y_ref = VectorXd::Zero(rows);
+        for (int i = 0; i < rows; ++i)
+          for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j);
+        VERIFY_IS_APPROX(y, y_ref);
+      }
+    }
+  }
+
+  // Normal stride (n8 active) to cover the 8-row main loop + remainders.
+  for (int ci = 0; ci < 7; ++ci) {
+    for (int ri = 0; ri < 14; ++ri) {
+      int rows = test_rows[ri], cols = test_cols[ci];
+      Matrix<double, Dynamic, Dynamic, RowMajor> A(rows, cols);
+      A.setRandom();
+      VectorXd x = VectorXd::Random(cols);
+      VectorXd y = A * x;
+      VectorXd y_ref = VectorXd::Zero(rows);
+      for (int i = 0; i < rows; ++i)
+        for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j);
+      VERIFY_IS_APPROX(y, y_ref);
+    }
+  }
+
+  // Float with large stride: 9000 * sizeof(float) = 36000 > 32000
+  {
+    const int stride = 9000;
+    for (int ci = 0; ci < 7; ++ci) {
+      for (int ri = 0; ri < 14; ++ri) {
+        int rows = test_rows[ri], cols = test_cols[ci];
+        Matrix<float, Dynamic, Dynamic, RowMajor> A_full(rows, stride);
+        A_full.setRandom();
+        auto A = A_full.leftCols(cols);
+        VectorXf x = VectorXf::Random(cols);
+        VectorXf y = A * x;
+        VectorXf y_ref = VectorXf::Zero(rows);
+        for (int i = 0; i < rows; ++i)
+          for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j);
+        VERIFY_IS_APPROX(y, y_ref);
+      }
+    }
+  }
+}
+
+// Test the main row-major GEMV n8=0 path (not run_small_cols) with varied row counts.
+// The n8 threshold is stride*sizeof(Scalar) > 32000.
+template <int>
+void gemv_rowmajor_large_stride_varied_rows() {
+  const int test_rows[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 16, 17, 25, 100};
+  // Double: cols=5000 (5000*8 > 32000), enough cols to stay on main run() path.
+  {
+    const int cols = 5000;
+    for (int ri = 0; ri < 16; ++ri) {
+      int rows = test_rows[ri];
+      Matrix<double, Dynamic, Dynamic, RowMajor> A(rows, cols);
+      A.setRandom();
+      VectorXd x = VectorXd::Random(cols);
+      VectorXd y = A * x;
+      VectorXd y_ref = VectorXd::Zero(rows);
+      for (int i = 0; i < rows; ++i)
+        for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j);
+      VERIFY_IS_APPROX(y, y_ref);
+    }
+  }
+  // Float: cols=9000 (9000*4 > 32000).
+  {
+    const int cols = 9000;
+    for (int ri = 0; ri < 16; ++ri) {
+      int rows = test_rows[ri];
+      Matrix<float, Dynamic, Dynamic, RowMajor> A(rows, cols);
+      A.setRandom();
+      VectorXf x = VectorXf::Random(cols);
+      VectorXf y = A * x;
+      VectorXf y_ref = VectorXf::Zero(rows);
+      for (int i = 0; i < rows; ++i)
+        for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j);
+      VERIFY_IS_APPROX(y, y_ref);
+    }
+  }
+}
+
+// Test extreme aspect ratios that exercise GEMV, outer-product, and thin-GEMM dispatch.
+template <int>
+void product_extreme_aspect_ratios() {
+  const int sizes[] = {1, 2, 3, 4, 8, 16, 48, 64, 128};
+  for (int si = 0; si < 9; ++si) {
+    int s = sizes[si];
+    for (int ki = 0; ki < 9; ++ki) {
+      int k = sizes[ki];
+      // Thin result: s x k * k x 2  (2-column GEMM)
+      {
+        MatrixXd A = MatrixXd::Random(s, k);
+        MatrixXd B = MatrixXd::Random(k, 2);
+        MatrixXd C = A * B;
+        MatrixXd Cref = MatrixXd::Zero(s, 2);
+        for (int i = 0; i < s; ++i)
+          for (int j = 0; j < 2; ++j)
+            for (int kk = 0; kk < k; ++kk) Cref(i, j) += A(i, kk) * B(kk, j);
+        VERIFY_IS_APPROX(C, Cref);
+      }
+      // Wide result: 2 x k * k x s  (2-row GEMM)
+      {
+        MatrixXd A = MatrixXd::Random(2, k);
+        MatrixXd B = MatrixXd::Random(k, s);
+        MatrixXd C = A * B;
+        MatrixXd Cref = MatrixXd::Zero(2, s);
+        for (int i = 0; i < 2; ++i)
+          for (int j = 0; j < s; ++j)
+            for (int kk = 0; kk < k; ++kk) Cref(i, j) += A(i, kk) * B(kk, j);
+        VERIFY_IS_APPROX(C, Cref);
+      }
+      // GEMV: s x k * k x 1
+      {
+        MatrixXd A = MatrixXd::Random(s, k);
+        VectorXd x = VectorXd::Random(k);
+        VectorXd y = A * x;
+        VectorXd yref = VectorXd::Zero(s);
+        for (int i = 0; i < s; ++i)
+          for (int kk = 0; kk < k; ++kk) yref(i) += A(i, kk) * x(kk);
+        VERIFY_IS_APPROX(y, yref);
+      }
+      // Vec-mat: 1 x k * k x s
+      {
+        RowVectorXd v = RowVectorXd::Random(k);
+        MatrixXd B = MatrixXd::Random(k, s);
+        RowVectorXd r = v * B;
+        RowVectorXd rref = RowVectorXd::Zero(s);
+        for (int j = 0; j < s; ++j)
+          for (int kk = 0; kk < k; ++kk) rref(j) += v(kk) * B(kk, j);
+        VERIFY_IS_APPROX(r, rref);
+      }
+    }
+  }
+}
+
 template <int>
 void bug_1622() {
   typedef Matrix<double, 2, -1, 0, 2, -1> Mat2X;
@@ -193,11 +346,16 @@ EIGEN_DECLARE_TEST(product_large) {
         internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
     CALL_SUBTEST_11(product(Matrix<bfloat16, Dynamic, Dynamic, RowMajor>(
         internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
+    CALL_SUBTEST_12(product(Matrix<Eigen::half, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE),
+                                                                  internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
   }
 
   CALL_SUBTEST_6(product_large_regressions<0>());
   CALL_SUBTEST_6(bug_gemv_rowmajor_large_stride<0>());
   CALL_SUBTEST_6(bug_gemv_run_small_cols<0>());
+  CALL_SUBTEST_6(gemv_small_cols_systematic<0>());
+  CALL_SUBTEST_6(gemv_rowmajor_large_stride_varied_rows<0>());
+  CALL_SUBTEST_6(product_extreme_aspect_ratios<0>());
 
   // Regression test for bug 714:
 #if defined EIGEN_HAS_OPENMP
diff --git a/test/product_small.cpp b/test/product_small.cpp
index 1b1eef1e6..1ad06f61c 100644
--- a/test/product_small.cpp
+++ b/test/product_small.cpp
@@ -275,6 +275,31 @@ void product_small_regressions() {
   }
 }
 
+// Test products at sizes near critical code-path transitions:
+//  - EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD = 8 (coeff-based vs GEBP)
+//  - Blocking early-return at max(k,m,n) < 48
+// Uses a sparse set of sizes so total count is 14^3 = 2744 (fast).
+template <typename T>
+void product_transition_sizes() {
+  using Matrix = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>;
+  const int critical[] = {1, 2, 4, 7, 8, 9, 12, 16, 24, 32, 47, 48, 49, 64};
+  for (int im = 0; im < 14; ++im) {
+    for (int in = 0; in < 14; ++in) {
+      Matrix C = Matrix::Zero(critical[im], critical[in]);
+      Matrix Cref = Matrix::Zero(critical[im], critical[in]);
+      for (int ik = 0; ik < 14; ++ik) {
+        int m = critical[im], n = critical[in], k = critical[ik];
+        Matrix A = Matrix::Random(m, k);
+        Matrix B = Matrix::Random(k, n);
+        C = A * B;
+        Cref.setZero();
+        ref_prod(Cref, A, B);
+        VERIFY_IS_APPROX(C, Cref);
+      }
+    }
+  }
+}
+
 template <typename T>
 void product_sweep(int max_m, int max_k, int max_n) {
   using Matrix = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>;
@@ -338,4 +363,8 @@ EIGEN_DECLARE_TEST(product_small) {
   }
 
   CALL_SUBTEST_6(product_small_regressions<0>());
+
+  // Deterministic sweep at transition boundaries (outside g_repeat).
+  CALL_SUBTEST_54(product_transition_sizes<float>());
+  CALL_SUBTEST_55(product_transition_sizes<double>());
 }
diff --git a/test/product_threaded.cpp b/test/product_threaded.cpp
index 90d5d99c4..6bd9397bb 100644
--- a/test/product_threaded.cpp
+++ b/test/product_threaded.cpp
@@ -24,6 +24,54 @@ void test_parallelize_gemm() {
   c_threaded.noalias() = a * b;
 
   VERIFY_IS_APPROX(c, c_threaded);
+  Eigen::setGemmThreadPool(nullptr);
 }
 
-EIGEN_DECLARE_TEST(product_threaded) { CALL_SUBTEST(test_parallelize_gemm()); }
+void test_parallelize_gemm_varied() {
+  constexpr int num_threads = 4;
+  ThreadPool pool(num_threads);
+
+  // Non-square float
+  {
+    MatrixXf a = MatrixXf::Random(512, 2048);
+    MatrixXf b = MatrixXf::Random(2048, 256);
+    MatrixXf c_serial(512, 256);
+    c_serial.noalias() = a * b;
+    Eigen::setGemmThreadPool(&pool);
+    MatrixXf c_threaded(512, 256);
+    c_threaded.noalias() = a * b;
+    Eigen::setGemmThreadPool(nullptr);
+    VERIFY_IS_APPROX(c_serial, c_threaded);
+  }
+
+  // Double
+  {
+    MatrixXd a = MatrixXd::Random(512, 512);
+    MatrixXd b = MatrixXd::Random(512, 512);
+    MatrixXd c_serial(512, 512);
+    c_serial.noalias() = a * b;
+    Eigen::setGemmThreadPool(&pool);
+    MatrixXd c_threaded(512, 512);
+    c_threaded.noalias() = a * b;
+    Eigen::setGemmThreadPool(nullptr);
+    VERIFY_IS_APPROX(c_serial, c_threaded);
+  }
+
+  // Complex double
+  {
+    MatrixXcd a = MatrixXcd::Random(256, 256);
+    MatrixXcd b = MatrixXcd::Random(256, 256);
+    MatrixXcd c_serial(256, 256);
+    c_serial.noalias() = a * b;
+    Eigen::setGemmThreadPool(&pool);
+    MatrixXcd c_threaded(256, 256);
+    c_threaded.noalias() = a * b;
+    Eigen::setGemmThreadPool(nullptr);
+    VERIFY_IS_APPROX(c_serial, c_threaded);
+  }
+}
+
+EIGEN_DECLARE_TEST(product_threaded) {
+  CALL_SUBTEST_1(test_parallelize_gemm());
+  CALL_SUBTEST_2(test_parallelize_gemm_varied());
+}