mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Fix row-major GEMV dropping rows when n8 heuristic disables main loop
libeigen/eigen!2233 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
@@ -423,7 +423,7 @@ general_matrix_vector_product<Index, LhsScalar, LhsMapper, RowMajor, ConjugateLh
|
||||
res[(i + 6) * resIncr] += alpha * cc6;
|
||||
res[(i + 7) * resIncr] += alpha * cc7;
|
||||
}
|
||||
if (i < n4) {
|
||||
for (; i < n4; i += 4) {
|
||||
ResPacket c0 = pzero(ResPacket{}), c1 = pzero(ResPacket{}), c2 = pzero(ResPacket{}), c3 = pzero(ResPacket{});
|
||||
|
||||
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize) {
|
||||
@@ -451,9 +451,8 @@ general_matrix_vector_product<Index, LhsScalar, LhsMapper, RowMajor, ConjugateLh
|
||||
res[(i + 1) * resIncr] += alpha * cc1;
|
||||
res[(i + 2) * resIncr] += alpha * cc2;
|
||||
res[(i + 3) * resIncr] += alpha * cc3;
|
||||
i += 4;
|
||||
}
|
||||
if (i < n2) {
|
||||
for (; i < n2; i += 2) {
|
||||
ResPacket c0 = pzero(ResPacket{}), c1 = pzero(ResPacket{});
|
||||
|
||||
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize) {
|
||||
@@ -473,9 +472,8 @@ general_matrix_vector_product<Index, LhsScalar, LhsMapper, RowMajor, ConjugateLh
|
||||
}
|
||||
res[(i + 0) * resIncr] += alpha * cc0;
|
||||
res[(i + 1) * resIncr] += alpha * cc1;
|
||||
i += 2;
|
||||
}
|
||||
if (i < rows) {
|
||||
for (; i < rows; ++i) {
|
||||
ResPacket c0 = pzero(ResPacket{});
|
||||
ResPacketHalf c0_h = pzero(ResPacketHalf{});
|
||||
ResPacketQuarter c0_q = pzero(ResPacketQuarter{});
|
||||
|
||||
@@ -94,6 +94,42 @@ void product_large_regressions() {
|
||||
}
|
||||
}
|
||||
|
||||
// Regression test: row-major GEMV with stride*sizeof > 32000 disables the
|
||||
// 8-row main loop (n8=0). The cleanup must use `for` loops (not `if`) to
|
||||
// process all remaining rows. Without the fix, only 7 out of `rows` results
|
||||
// are computed. This manifests as loss of orthogonality in QR of tall-skinny
|
||||
// matrices, since the Householder application uses row-major GEMV internally.
|
||||
template <int>
|
||||
void bug_gemv_rowmajor_large_stride() {
|
||||
// Direct GEMV test: row-major A with stride (= cols) triggering n8=0.
|
||||
// The threshold is stride * sizeof(Scalar) > 32000.
|
||||
// For double: cols > 4000. For float: cols > 8000.
|
||||
{
|
||||
const int rows = 100;
|
||||
const int cols = 5000; // cols * sizeof(double) = 40000 > 32000
|
||||
Matrix<double, Dynamic, Dynamic, RowMajor> A(rows, cols);
|
||||
A.setRandom();
|
||||
VectorXd x = VectorXd::Random(cols);
|
||||
VectorXd y = A * x;
|
||||
VectorXd y_ref = VectorXd::Zero(rows);
|
||||
for (int i = 0; i < rows; ++i)
|
||||
for (int j = 0; j < cols; ++j) y_ref(i) += A(i, j) * x(j);
|
||||
VERIFY_IS_APPROX(y, y_ref);
|
||||
}
|
||||
|
||||
// QR orthogonality test: this is the high-level symptom.
|
||||
// HouseholderQR of a col-major (m x n) matrix with m > 4000
|
||||
// uses row-major GEMV internally during Householder application.
|
||||
{
|
||||
const int m = 5000;
|
||||
const int n = 50;
|
||||
MatrixXd A = MatrixXd::Random(m, n);
|
||||
MatrixXd Q = A.householderQr().householderQ() * MatrixXd::Identity(m, n);
|
||||
MatrixXd QtQ = Q.adjoint() * Q;
|
||||
VERIFY_IS_APPROX(QtQ, MatrixXd::Identity(n, n));
|
||||
}
|
||||
}
|
||||
|
||||
template <int>
|
||||
void bug_1622() {
|
||||
typedef Matrix<double, 2, -1, 0, 2, -1> Mat2X;
|
||||
@@ -138,6 +174,7 @@ EIGEN_DECLARE_TEST(product_large) {
|
||||
}
|
||||
|
||||
CALL_SUBTEST_6(product_large_regressions<0>());
|
||||
CALL_SUBTEST_6(bug_gemv_rowmajor_large_stride<0>());
|
||||
|
||||
// Regression test for bug 714:
|
||||
#if defined EIGEN_HAS_OPENMP
|
||||
|
||||
Reference in New Issue
Block a user