Fix row-major GEMV dropping rows when n8 heuristic disables main loop

libeigen/eigen!2233

Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
Rasmus Munk Larsen
2026-03-01 23:47:35 -08:00
parent 662d5c21ff
commit 57b1de2330
2 changed files with 40 additions and 5 deletions

View File

@@ -423,7 +423,7 @@ general_matrix_vector_product<Index, LhsScalar, LhsMapper, RowMajor, ConjugateLh
res[(i + 6) * resIncr] += alpha * cc6;
res[(i + 7) * resIncr] += alpha * cc7;
}
if (i < n4) {
for (; i < n4; i += 4) {
ResPacket c0 = pzero(ResPacket{}), c1 = pzero(ResPacket{}), c2 = pzero(ResPacket{}), c3 = pzero(ResPacket{});
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize) {
@@ -451,9 +451,8 @@ general_matrix_vector_product<Index, LhsScalar, LhsMapper, RowMajor, ConjugateLh
res[(i + 1) * resIncr] += alpha * cc1;
res[(i + 2) * resIncr] += alpha * cc2;
res[(i + 3) * resIncr] += alpha * cc3;
i += 4;
}
if (i < n2) {
for (; i < n2; i += 2) {
ResPacket c0 = pzero(ResPacket{}), c1 = pzero(ResPacket{});
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize) {
@@ -473,9 +472,8 @@ general_matrix_vector_product<Index, LhsScalar, LhsMapper, RowMajor, ConjugateLh
}
res[(i + 0) * resIncr] += alpha * cc0;
res[(i + 1) * resIncr] += alpha * cc1;
i += 2;
}
if (i < rows) {
for (; i < rows; ++i) {
ResPacket c0 = pzero(ResPacket{});
ResPacketHalf c0_h = pzero(ResPacketHalf{});
ResPacketQuarter c0_q = pzero(ResPacketQuarter{});

View File

@@ -94,6 +94,42 @@ void product_large_regressions() {
}
}
// Regression test: row-major GEMV with stride*sizeof > 32000 disables the
// 8-row main loop (n8=0). The cleanup must use `for` loops (not `if`) to
// process all remaining rows. Without the fix, only 7 out of `rows` results
// are computed. This manifests as loss of orthogonality in QR of tall-skinny
// matrices, since the Householder application uses row-major GEMV internally.
template <int>
void bug_gemv_rowmajor_large_stride() {
// Direct GEMV test: row-major A with stride (= cols) triggering n8=0.
// The threshold is stride * sizeof(Scalar) > 32000.
// For double: cols > 4000. For float: cols > 8000.
{
const int rows = 100;
const int cols = 5000; // cols * sizeof(double) = 40000 > 32000
Matrix<double, Dynamic, Dynamic, RowMajor> A(rows, cols);
A.setRandom();
VectorXd x = VectorXd::Random(cols);
VectorXd y = A * x;
VectorXd y_ref = VectorXd::Zero(rows);
for (int i = 0; i < rows; ++i)
for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j);
VERIFY_IS_APPROX(y, y_ref);
}
// QR orthogonality test: this is the high-level symptom.
// HouseholderQR of a col-major (m x n) matrix with m > 4000
// uses row-major GEMV internally during Householder application.
{
const int m = 5000;
const int n = 50;
MatrixXd A = MatrixXd::Random(m, n);
MatrixXd Q = A.householderQr().householderQ() * MatrixXd::Identity(m, n);
MatrixXd QtQ = Q.adjoint() * Q;
VERIFY_IS_APPROX(QtQ, MatrixXd::Identity(n, n));
}
}
template <int>
void bug_1622() {
typedef Matrix<double, 2, -1, 0, 2, -1> Mat2X;
@@ -138,6 +174,7 @@ EIGEN_DECLARE_TEST(product_large) {
}
CALL_SUBTEST_6(product_large_regressions<0>());
CALL_SUBTEST_6(bug_gemv_rowmajor_large_stride<0>());
// Regression test for bug 714:
#if defined EIGEN_HAS_OPENMP