From 25dba492e30e19dce3d4bd4cd38af2f5c88c387c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20S=C3=A1nchez?= Date: Sat, 28 Feb 2026 05:04:41 +0000 Subject: [PATCH] Use stack-constructed variable for SVD block sweep. libeigen/eigen!2225 --- Eigen/src/SVD/JacobiSVD.h | 88 +++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 40 deletions(-) diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index 237ba3899..fd0275c0f 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -425,7 +425,7 @@ struct svd_precondition_2x2_block_to_be_real { }; template -struct traits > : svd_traits { +struct traits> : svd_traits { typedef MatrixType_ MatrixType; }; @@ -497,7 +497,7 @@ struct traits > : svd_traits -class JacobiSVD : public SVDBase > { +class JacobiSVD : public SVDBase> { typedef SVDBase Base; public: @@ -695,11 +695,13 @@ class JacobiSVD : public SVDBase > { // Blocking parameters for the Jacobi SVD sweep. #ifdef EIGEN_JACOBI_SVD_BLOCK_SIZE - static constexpr Index kBlockSize = EIGEN_JACOBI_SVD_BLOCK_SIZE; + static constexpr Index kDefaultBlockSize = EIGEN_JACOBI_SVD_BLOCK_SIZE; #else - static constexpr Index kBlockSize = 32; + static constexpr Index kDefaultBlockSize = 32; #endif - Matrix m_blockBuffer; + + // Use the lower of the default block size and static maximum matrix dimensions. + static constexpr Index kBlockSize = internal::min_size_prefer_fixed(kDefaultBlockSize, MaxDiagSizeAtCompileTime); }; template @@ -900,6 +902,14 @@ EIGEN_DONT_INLINE bool JacobiSVD::blocked_sweep(RealScalar const Index n = diagSize(); RealScalar threshold = numext::maxi(considerAsZero, precision * maxDiagEntry); bool notFinished = false; + static constexpr Index kBlockBufferSize = (kBlockSize + 1) * (kBlockSize + 1); + ei_declare_aligned_stack_constructed_variable(Scalar, blockBufferPtr, kBlockBufferSize, 0); + Map, AlignedMax> blockBuffer( + blockBufferPtr, kBlockSize + 1, kBlockSize + 1); + + ei_declare_aligned_stack_constructed_variable(Scalar, accumPtr, kBlockBufferSize, 0); + Map, AlignedMax> accum(accumPtr, kBlockSize + 1, + kBlockSize + 1); for (Index p = 1; p < n; ++p) { Index q = 0; @@ -911,23 +921,21 @@ EIGEN_DONT_INLINE bool JacobiSVD::blocked_sweep(RealScalar for (; q + kBlockSize <= p; q += kBlockSize) { // Buffer = [ W(q:q+k, q:q+k) W(q:q+k, p) ] // [ W(p, q:q+k) W(p, p) ] - m_blockBuffer.template topLeftCorner() = + blockBuffer.template topLeftCorner() = m_workMatrix.template block(q, q); - m_blockBuffer.col(kBlockSize).template head() = m_workMatrix.col(p).template segment(q); - m_blockBuffer.row(kBlockSize).template head() = m_workMatrix.row(p).template segment(q); - m_blockBuffer(kBlockSize, kBlockSize) = m_workMatrix(p, p); + blockBuffer.col(kBlockSize).template head() = m_workMatrix.col(p).template segment(q); + blockBuffer.row(kBlockSize).template head() = m_workMatrix.row(p).template segment(q); + blockBuffer(kBlockSize, kBlockSize) = m_workMatrix(p, p); // Accumulator for left transformations: W <- accum * W. // After processing qq pairs, accum's top-left kBlockSize x kBlockSize // block is lower-triangular (each rotation only mixes row qq with row // kBlockSize, so rows 0..qq-1 are unchanged). - Matrix accum; accum.setIdentity(kBlockSize + 1, kBlockSize + 1); bool blockDirty = false; for (Index qq = 0; qq < kBlockSize; ++qq) { - if (abs(m_blockBuffer.coeff(kBlockSize, qq)) > threshold || - abs(m_blockBuffer.coeff(qq, kBlockSize)) > threshold) { + if (abs(blockBuffer.coeff(kBlockSize, qq)) > threshold || abs(blockBuffer.coeff(qq, kBlockSize)) > threshold) { notFinished = true; blockDirty = true; @@ -945,24 +953,24 @@ EIGEN_DONT_INLINE bool JacobiSVD::blocked_sweep(RealScalar EIGEN_IF_CONSTEXPR(NumTraits::IsComplex) { Scalar z; // nn = ||(w_pp, w_qp)||_2, the norm of the first column of the 2x2 block. - RealScalar nn = sqrt(numext::abs2(m_blockBuffer.coeff(kBlockSize, kBlockSize)) + - numext::abs2(m_blockBuffer.coeff(qq, kBlockSize))); + RealScalar nn = sqrt(numext::abs2(blockBuffer.coeff(kBlockSize, kBlockSize)) + + numext::abs2(blockBuffer.coeff(qq, kBlockSize))); if (numext::is_exactly_zero(nn)) { // First column is zero => block is already upper triangular. - m_blockBuffer.coeffRef(kBlockSize, kBlockSize) = Scalar(0); - m_blockBuffer.coeffRef(qq, kBlockSize) = Scalar(0); + blockBuffer.coeffRef(kBlockSize, kBlockSize) = Scalar(0); + blockBuffer.coeffRef(qq, kBlockSize) = Scalar(0); // Scale rows by z = e^{-i*arg(w)} to make remaining entries real. - if (abs(numext::imag(m_blockBuffer.coeff(kBlockSize, qq))) > considerAsZero) { - z = abs(m_blockBuffer.coeff(kBlockSize, qq)) / m_blockBuffer.coeff(kBlockSize, qq); - m_blockBuffer.row(kBlockSize) *= z; + if (abs(numext::imag(blockBuffer.coeff(kBlockSize, qq))) > considerAsZero) { + z = abs(blockBuffer.coeff(kBlockSize, qq)) / blockBuffer.coeff(kBlockSize, qq); + blockBuffer.row(kBlockSize) *= z; accum.row(kBlockSize) *= z; // accumulate left op if (computeU()) m_matrixU.col(p) *= numext::conj(z); } - if (abs(numext::imag(m_blockBuffer.coeff(qq, qq))) > considerAsZero) { - z = abs(m_blockBuffer.coeff(qq, qq)) / m_blockBuffer.coeff(qq, qq); - m_blockBuffer.row(qq) *= z; + if (abs(numext::imag(blockBuffer.coeff(qq, qq))) > considerAsZero) { + z = abs(blockBuffer.coeff(qq, qq)) / blockBuffer.coeff(qq, qq); + blockBuffer.row(qq) *= z; accum.row(qq) *= z; // accumulate left op if (computeU()) m_matrixU.col(q + qq) *= numext::conj(z); } @@ -972,43 +980,43 @@ EIGEN_DONT_INLINE bool JacobiSVD::blocked_sweep(RealScalar // [-s c] [w_qp] = [0 ] c = ----------, s = ------ // nn nn JacobiRotation rot; - rot.c() = numext::conj(m_blockBuffer.coeff(kBlockSize, kBlockSize)) / nn; - rot.s() = m_blockBuffer.coeff(qq, kBlockSize) / nn; - m_blockBuffer.applyOnTheLeft(kBlockSize, qq, rot); + rot.c() = numext::conj(blockBuffer.coeff(kBlockSize, kBlockSize)) / nn; + rot.s() = blockBuffer.coeff(qq, kBlockSize) / nn; + blockBuffer.applyOnTheLeft(kBlockSize, qq, rot); accum.applyOnTheLeft(kBlockSize, qq, rot); // accumulate left op if (computeU()) m_matrixU.applyOnTheRight(p, q + qq, rot.adjoint()); // Scale column qq by z = e^{-i*arg(w_pq)} to make w_pq real. - if (abs(numext::imag(m_blockBuffer.coeff(kBlockSize, qq))) > considerAsZero) { - z = abs(m_blockBuffer.coeff(kBlockSize, qq)) / m_blockBuffer.coeff(kBlockSize, qq); - m_blockBuffer.col(qq) *= z; + if (abs(numext::imag(blockBuffer.coeff(kBlockSize, qq))) > considerAsZero) { + z = abs(blockBuffer.coeff(kBlockSize, qq)) / blockBuffer.coeff(kBlockSize, qq); + blockBuffer.col(qq) *= z; m_workMatrix.col(q + qq) *= z; // right op: apply directly if (computeV()) m_matrixV.col(q + qq) *= z; } // Scale row qq by z = e^{-i*arg(w_qq)} to make w_qq real. - if (abs(numext::imag(m_blockBuffer.coeff(qq, qq))) > considerAsZero) { - z = abs(m_blockBuffer.coeff(qq, qq)) / m_blockBuffer.coeff(qq, qq); - m_blockBuffer.row(qq) *= z; + if (abs(numext::imag(blockBuffer.coeff(qq, qq))) > considerAsZero) { + z = abs(blockBuffer.coeff(qq, qq)) / blockBuffer.coeff(qq, qq); + blockBuffer.row(qq) *= z; accum.row(qq) *= z; // accumulate left op if (computeU()) m_matrixU.col(q + qq) *= numext::conj(z); } } // Update maxDiagEntry from preconditioning. maxDiagEntry = numext::maxi( - maxDiagEntry, numext::maxi(abs(m_blockBuffer.coeff(kBlockSize, kBlockSize)), - abs(m_blockBuffer.coeff(qq, qq)))); + maxDiagEntry, numext::maxi(abs(blockBuffer.coeff(kBlockSize, kBlockSize)), + abs(blockBuffer.coeff(qq, qq)))); // Check if 2x2 block still needs diagonalizing. RealScalar precondThreshold = numext::maxi(considerAsZero, precision * maxDiagEntry); - doRealSvd = abs(m_blockBuffer.coeff(kBlockSize, qq)) > precondThreshold || - abs(m_blockBuffer.coeff(qq, kBlockSize)) > precondThreshold; + doRealSvd = abs(blockBuffer.coeff(kBlockSize, qq)) > precondThreshold || + abs(blockBuffer.coeff(qq, kBlockSize)) > precondThreshold; } if (doRealSvd) { // Compute real 2x2 SVD: buffer_2x2 = j_left * diag * j_right^T. JacobiRotation j_left, j_right; - internal::real_2x2_jacobi_svd(m_blockBuffer, kBlockSize, qq, &j_left, &j_right); - m_blockBuffer.applyOnTheLeft(kBlockSize, qq, j_left); - m_blockBuffer.applyOnTheRight(kBlockSize, qq, j_right); + internal::real_2x2_jacobi_svd(blockBuffer, kBlockSize, qq, &j_left, &j_right); + blockBuffer.applyOnTheLeft(kBlockSize, qq, j_left); + blockBuffer.applyOnTheRight(kBlockSize, qq, j_right); // Accumulate left rotation for deferred GEMM. accum.applyOnTheLeft(kBlockSize, qq, j_left); @@ -1019,8 +1027,8 @@ EIGEN_DONT_INLINE bool JacobiSVD::blocked_sweep(RealScalar if (computeV()) m_matrixV.applyOnTheRight(p, q + qq, j_right); maxDiagEntry = numext::maxi( - maxDiagEntry, numext::maxi(abs(m_blockBuffer.coeff(kBlockSize, kBlockSize)), - abs(m_blockBuffer.coeff(qq, qq)))); + maxDiagEntry, numext::maxi(abs(blockBuffer.coeff(kBlockSize, kBlockSize)), + abs(blockBuffer.coeff(qq, qq)))); } threshold = numext::maxi(considerAsZero, precision * maxDiagEntry); }