mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Blocked Jacobi SVD sweep with L2-cache-adaptive threshold
libeigen/eigen!2206 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com> Co-authored-by: Rasmus Munk Larsen <rmlarsen@google.com>
This commit is contained in:
@@ -372,8 +372,8 @@ struct svd_precondition_2x2_block_to_be_real<MatrixType, Options, true> {
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
static bool run(typename SVD::WorkMatrixType& work_matrix, SVD& svd, Index p, Index q, RealScalar& maxDiagEntry) {
|
||||
using std::abs;
|
||||
using std::sqrt;
|
||||
using numext::abs;
|
||||
using numext::sqrt;
|
||||
Scalar z;
|
||||
JacobiRotation<Scalar> rot;
|
||||
RealScalar n = sqrt(numext::abs2(work_matrix.coeff(p, p)) + numext::abs2(work_matrix.coeff(q, p)));
|
||||
@@ -653,6 +653,12 @@ class JacobiSVD : public SVDBase<JacobiSVD<MatrixType_, Options_> > {
|
||||
template <typename Derived>
|
||||
JacobiSVD& compute_impl(const MatrixBase<Derived>& matrix, unsigned int computationOptions);
|
||||
|
||||
// Blocked sweep for the Jacobi SVD (works for both real and complex scalars).
|
||||
// Extracted into a separate EIGEN_DONT_INLINE method to prevent the blocking
|
||||
// code from interfering with the compiler's optimization of the non-blocking
|
||||
// scalar sweep.
|
||||
EIGEN_DONT_INLINE bool blocked_sweep(RealScalar considerAsZero, RealScalar precision, RealScalar& maxDiagEntry);
|
||||
|
||||
protected:
|
||||
using Base::m_computationOptions;
|
||||
using Base::m_computeFullU;
|
||||
@@ -686,6 +692,14 @@ class JacobiSVD : public SVDBase<JacobiSVD<MatrixType_, Options_> > {
|
||||
internal::qr_preconditioner_impl<MatrixType, Options, QRPreconditioner, internal::PreconditionIfMoreRowsThanCols>
|
||||
m_qr_precond_morerows;
|
||||
WorkMatrixType m_workMatrix;
|
||||
|
||||
// Blocking parameters for the Jacobi SVD sweep.
|
||||
#ifdef EIGEN_JACOBI_SVD_BLOCK_SIZE
|
||||
static constexpr Index kBlockSize = EIGEN_JACOBI_SVD_BLOCK_SIZE;
|
||||
#else
|
||||
static constexpr Index kBlockSize = 32;
|
||||
#endif
|
||||
Matrix<Scalar, kBlockSize + 1, kBlockSize + 1> m_blockBuffer;
|
||||
};
|
||||
|
||||
template <typename MatrixType, int Options>
|
||||
@@ -703,7 +717,7 @@ JacobiSVD<MatrixType, Options>& JacobiSVD<MatrixType, Options>::compute_impl(con
|
||||
EIGEN_STATIC_ASSERT((std::is_same<typename Derived::Scalar, typename MatrixType::Scalar>::value),
|
||||
Input matrix must have the same Scalar type as the BDCSVD object.);
|
||||
|
||||
using std::abs;
|
||||
using numext::abs;
|
||||
|
||||
allocate(matrix.rows(), matrix.cols(), computationOptions);
|
||||
|
||||
@@ -746,37 +760,73 @@ JacobiSVD<MatrixType, Options>& JacobiSVD<MatrixType, Options>::compute_impl(con
|
||||
while (!finished) {
|
||||
finished = true;
|
||||
|
||||
// do a sweep: for all index pairs (p,q), perform SVD of the corresponding 2x2 sub-matrix
|
||||
// Threshold is hoisted before the double loop; it only needs updating when maxDiagEntry
|
||||
// increases (which only happens inside the rotation block). Since maxDiagEntry is
|
||||
// monotonically non-decreasing, a slightly stale threshold is conservative.
|
||||
RealScalar threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
|
||||
{
|
||||
// Sweep with optional blocking for large matrices.
|
||||
// Use blocking when the matrix is large enough that individual left rotations
|
||||
// (strided row operations on column-major data) cause significant cache misses.
|
||||
// The threshold is derived from the L2 cache size: blocking becomes worthwhile
|
||||
// when n exceeds sqrt(L2 / 4). We divide by sizeof(float) rather than sizeof(RealScalar)
|
||||
// because the cache miss pattern depends on the number of columns accessed (one cache
|
||||
// line per column), not the scalar size. This also makes the threshold appropriately
|
||||
// more conservative for larger types where GEMM overhead is higher.
|
||||
const Index n = diagSize();
|
||||
#ifdef EIGEN_JACOBI_SVD_BLOCKING_THRESHOLD
|
||||
const Index blockingThreshold = EIGEN_JACOBI_SVD_BLOCKING_THRESHOLD;
|
||||
#else
|
||||
const Index blockingThreshold =
|
||||
static_cast<Index>(numext::sqrt(static_cast<RealScalar>(l2CacheSize() / sizeof(float))));
|
||||
#endif
|
||||
|
||||
for (Index p = 1; p < diagSize(); ++p) {
|
||||
for (Index q = 0; q < p; ++q) {
|
||||
// if this 2x2 sub-matrix is not diagonal already...
|
||||
// notice that this comparison will evaluate to false if any NaN is involved, ensuring that NaN's don't
|
||||
// keep us iterating forever. Similarly, small denormal numbers are considered zero.
|
||||
if (abs(m_workMatrix.coeff(p, q)) > threshold || abs(m_workMatrix.coeff(q, p)) > threshold) {
|
||||
finished = false;
|
||||
// perform SVD decomposition of 2x2 sub-matrix corresponding to indices p,q to make it diagonal
|
||||
// the complex to real operation returns true if the updated 2x2 block is not already diagonal
|
||||
if (internal::svd_precondition_2x2_block_to_be_real<MatrixType, Options>::run(m_workMatrix, *this, p, q,
|
||||
maxDiagEntry)) {
|
||||
JacobiRotation<RealScalar> j_left, j_right;
|
||||
internal::real_2x2_jacobi_svd(m_workMatrix, p, q, &j_left, &j_right);
|
||||
|
||||
// accumulate resulting Jacobi rotations
|
||||
m_workMatrix.applyOnTheLeft(p, q, j_left);
|
||||
if (computeU()) m_matrixU.applyOnTheRight(p, q, j_left.transpose());
|
||||
|
||||
m_workMatrix.applyOnTheRight(p, q, j_right);
|
||||
if (computeV()) m_matrixV.applyOnTheRight(p, q, j_right);
|
||||
|
||||
// keep track of the largest diagonal coefficient
|
||||
maxDiagEntry = numext::maxi<RealScalar>(
|
||||
maxDiagEntry, numext::maxi<RealScalar>(abs(m_workMatrix.coeff(p, p)), abs(m_workMatrix.coeff(q, q))));
|
||||
threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
|
||||
if (n >= blockingThreshold) {
|
||||
// The blocked sweep is in a separate EIGEN_DONT_INLINE method to prevent
|
||||
// the blocking code from interfering with the compiler's optimization of
|
||||
// the non-blocking scalar sweep below.
|
||||
finished = !blocked_sweep(considerAsZero, precision, maxDiagEntry);
|
||||
}
|
||||
// Non-blocking paths: apply rotations individually. The real and complex
|
||||
// paths are kept separate to avoid any codegen impact from the complex
|
||||
// preconditioner on GCC's optimization of the real inner loop.
|
||||
else
|
||||
EIGEN_IF_CONSTEXPR(NumTraits<Scalar>::IsComplex) {
|
||||
// Complex non-blocking sweep: condition each 2x2 block to be real before diagonalizing.
|
||||
for (Index p = 1; p < n; ++p) {
|
||||
for (Index q = 0; q < p; ++q) {
|
||||
RealScalar threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
|
||||
if (abs(m_workMatrix.coeff(p, q)) > threshold || abs(m_workMatrix.coeff(q, p)) > threshold) {
|
||||
finished = false;
|
||||
if (internal::svd_precondition_2x2_block_to_be_real<MatrixType, Options>::run(m_workMatrix, *this, p, q,
|
||||
maxDiagEntry)) {
|
||||
JacobiRotation<RealScalar> j_left, j_right;
|
||||
internal::real_2x2_jacobi_svd(m_workMatrix, p, q, &j_left, &j_right);
|
||||
m_workMatrix.applyOnTheLeft(p, q, j_left);
|
||||
if (computeU()) m_matrixU.applyOnTheRight(p, q, j_left.transpose());
|
||||
m_workMatrix.applyOnTheRight(p, q, j_right);
|
||||
if (computeV()) m_matrixV.applyOnTheRight(p, q, j_right);
|
||||
maxDiagEntry = numext::maxi<RealScalar>(
|
||||
maxDiagEntry,
|
||||
numext::maxi<RealScalar>(abs(m_workMatrix.coeff(p, p)), abs(m_workMatrix.coeff(q, q))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Real non-blocking sweep: diagonalize each 2x2 block directly.
|
||||
RealScalar threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
|
||||
for (Index p = 1; p < n; ++p) {
|
||||
for (Index q = 0; q < p; ++q) {
|
||||
if (abs(m_workMatrix.coeff(p, q)) > threshold || abs(m_workMatrix.coeff(q, p)) > threshold) {
|
||||
finished = false;
|
||||
JacobiRotation<RealScalar> j_left, j_right;
|
||||
internal::real_2x2_jacobi_svd(m_workMatrix, p, q, &j_left, &j_right);
|
||||
m_workMatrix.applyOnTheLeft(p, q, j_left);
|
||||
if (computeU()) m_matrixU.applyOnTheRight(p, q, j_left.transpose());
|
||||
m_workMatrix.applyOnTheRight(p, q, j_right);
|
||||
if (computeV()) m_matrixV.applyOnTheRight(p, q, j_right);
|
||||
maxDiagEntry = numext::maxi<RealScalar>(
|
||||
maxDiagEntry, numext::maxi<RealScalar>(abs(m_workMatrix.coeff(p, p)), abs(m_workMatrix.coeff(q, q))));
|
||||
threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -826,6 +876,210 @@ JacobiSVD<MatrixType, Options>& JacobiSVD<MatrixType, Options>::compute_impl(con
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Blocked Jacobi SVD sweep for both real and complex scalar types. For large n,
|
||||
// applying left rotations (row operations on column-major data) causes cache
|
||||
// misses due to strided access. To mitigate this, we accumulate kBlockSize left
|
||||
// rotations into a small dense matrix and apply them via a single GEMM to the
|
||||
// contiguous row block q..q+kBlockSize-1 and the (possibly distant) row p.
|
||||
// Right rotations and column scalings act on columns (contiguous in column-major)
|
||||
// and are applied individually.
|
||||
//
|
||||
// For complex types, the 2x2 preconditioning (making the block real) involves
|
||||
// complex left rotations and row scalings, which are also accumulated into the
|
||||
// block matrix. Column scalings from preconditioning are applied directly.
|
||||
//
|
||||
// The accumulated rotation matrix has lower-triangular structure in its top-left
|
||||
// kBlockSize x kBlockSize corner, which we exploit with triangularView.
|
||||
//
|
||||
// Returns true if any off-diagonal element exceeded the threshold (i.e. sweep
|
||||
// is not yet converged).
|
||||
template <typename MatrixType, int Options>
|
||||
EIGEN_DONT_INLINE bool JacobiSVD<MatrixType, Options>::blocked_sweep(RealScalar considerAsZero, RealScalar precision,
|
||||
RealScalar& maxDiagEntry) {
|
||||
using numext::abs;
|
||||
using numext::sqrt;
|
||||
const Index n = diagSize();
|
||||
RealScalar threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
|
||||
bool notFinished = false;
|
||||
|
||||
for (Index p = 1; p < n; ++p) {
|
||||
Index q = 0;
|
||||
|
||||
// Blocked loop: process kBlockSize pairs (p,q+qq) for qq=0..kBlockSize-1.
|
||||
// We extract the relevant (kBlockSize+1) x (kBlockSize+1) submatrix of W
|
||||
// into a small buffer, compute all rotations on the buffer, accumulate the
|
||||
// left transformations into `accum`, and apply them in one GEMM at the end.
|
||||
for (; q + kBlockSize <= p; q += kBlockSize) {
|
||||
// Buffer = [ W(q:q+k, q:q+k) W(q:q+k, p) ]
|
||||
// [ W(p, q:q+k) W(p, p) ]
|
||||
m_blockBuffer.template topLeftCorner<kBlockSize, kBlockSize>() =
|
||||
m_workMatrix.template block<kBlockSize, kBlockSize>(q, q);
|
||||
m_blockBuffer.col(kBlockSize).template head<kBlockSize>() = m_workMatrix.col(p).template segment<kBlockSize>(q);
|
||||
m_blockBuffer.row(kBlockSize).template head<kBlockSize>() = m_workMatrix.row(p).template segment<kBlockSize>(q);
|
||||
m_blockBuffer(kBlockSize, kBlockSize) = m_workMatrix(p, p);
|
||||
|
||||
// Accumulator for left transformations: W <- accum * W.
|
||||
// After processing qq pairs, accum's top-left kBlockSize x kBlockSize
|
||||
// block is lower-triangular (each rotation only mixes row qq with row
|
||||
// kBlockSize, so rows 0..qq-1 are unchanged).
|
||||
Matrix<Scalar, kBlockSize + 1, kBlockSize + 1> accum;
|
||||
accum.setIdentity(kBlockSize + 1, kBlockSize + 1);
|
||||
bool blockDirty = false;
|
||||
|
||||
for (Index qq = 0; qq < kBlockSize; ++qq) {
|
||||
if (abs(m_blockBuffer.coeff(kBlockSize, qq)) > threshold ||
|
||||
abs(m_blockBuffer.coeff(qq, kBlockSize)) > threshold) {
|
||||
notFinished = true;
|
||||
blockDirty = true;
|
||||
|
||||
// Complex preconditioning: transform the 2x2 block
|
||||
// [w_pp w_pq] = [buffer(kBlockSize, kBlockSize) buffer(kBlockSize, qq)]
|
||||
// [w_qp w_qq] [buffer(qq, kBlockSize) buffer(qq, qq) ]
|
||||
// to have real entries via unitary row/column operations, so
|
||||
// real_2x2_jacobi_svd can be applied.
|
||||
//
|
||||
// Left operations (complex rotation, row scaling by e^{i*theta}) are
|
||||
// accumulated into `accum` for deferred GEMM application.
|
||||
// Right operations (column scaling) are applied directly since column
|
||||
// ops are contiguous in column-major layout.
|
||||
bool doRealSvd = true;
|
||||
EIGEN_IF_CONSTEXPR(NumTraits<Scalar>::IsComplex) {
|
||||
Scalar z;
|
||||
// nn = ||(w_pp, w_qp)||_2, the norm of the first column of the 2x2 block.
|
||||
RealScalar nn = sqrt(numext::abs2(m_blockBuffer.coeff(kBlockSize, kBlockSize)) +
|
||||
numext::abs2(m_blockBuffer.coeff(qq, kBlockSize)));
|
||||
|
||||
if (numext::is_exactly_zero(nn)) {
|
||||
// First column is zero => block is already upper triangular.
|
||||
m_blockBuffer.coeffRef(kBlockSize, kBlockSize) = Scalar(0);
|
||||
m_blockBuffer.coeffRef(qq, kBlockSize) = Scalar(0);
|
||||
|
||||
// Scale rows by z = e^{-i*arg(w)} to make remaining entries real.
|
||||
if (abs(numext::imag(m_blockBuffer.coeff(kBlockSize, qq))) > considerAsZero) {
|
||||
z = abs(m_blockBuffer.coeff(kBlockSize, qq)) / m_blockBuffer.coeff(kBlockSize, qq);
|
||||
m_blockBuffer.row(kBlockSize) *= z;
|
||||
accum.row(kBlockSize) *= z; // accumulate left op
|
||||
if (computeU()) m_matrixU.col(p) *= numext::conj(z);
|
||||
}
|
||||
if (abs(numext::imag(m_blockBuffer.coeff(qq, qq))) > considerAsZero) {
|
||||
z = abs(m_blockBuffer.coeff(qq, qq)) / m_blockBuffer.coeff(qq, qq);
|
||||
m_blockBuffer.row(qq) *= z;
|
||||
accum.row(qq) *= z; // accumulate left op
|
||||
if (computeU()) m_matrixU.col(q + qq) *= numext::conj(z);
|
||||
}
|
||||
} else {
|
||||
// Apply complex Givens rotation to zero out w_qp:
|
||||
// [c s] [w_pp] [nn] conj(w_pp) w_qp
|
||||
// [-s c] [w_qp] = [0 ] c = ----------, s = ------
|
||||
// nn nn
|
||||
JacobiRotation<Scalar> rot;
|
||||
rot.c() = numext::conj(m_blockBuffer.coeff(kBlockSize, kBlockSize)) / nn;
|
||||
rot.s() = m_blockBuffer.coeff(qq, kBlockSize) / nn;
|
||||
m_blockBuffer.applyOnTheLeft(kBlockSize, qq, rot);
|
||||
accum.applyOnTheLeft(kBlockSize, qq, rot); // accumulate left op
|
||||
if (computeU()) m_matrixU.applyOnTheRight(p, q + qq, rot.adjoint());
|
||||
|
||||
// Scale column qq by z = e^{-i*arg(w_pq)} to make w_pq real.
|
||||
if (abs(numext::imag(m_blockBuffer.coeff(kBlockSize, qq))) > considerAsZero) {
|
||||
z = abs(m_blockBuffer.coeff(kBlockSize, qq)) / m_blockBuffer.coeff(kBlockSize, qq);
|
||||
m_blockBuffer.col(qq) *= z;
|
||||
m_workMatrix.col(q + qq) *= z; // right op: apply directly
|
||||
if (computeV()) m_matrixV.col(q + qq) *= z;
|
||||
}
|
||||
// Scale row qq by z = e^{-i*arg(w_qq)} to make w_qq real.
|
||||
if (abs(numext::imag(m_blockBuffer.coeff(qq, qq))) > considerAsZero) {
|
||||
z = abs(m_blockBuffer.coeff(qq, qq)) / m_blockBuffer.coeff(qq, qq);
|
||||
m_blockBuffer.row(qq) *= z;
|
||||
accum.row(qq) *= z; // accumulate left op
|
||||
if (computeU()) m_matrixU.col(q + qq) *= numext::conj(z);
|
||||
}
|
||||
}
|
||||
// Update maxDiagEntry from preconditioning.
|
||||
maxDiagEntry = numext::maxi<RealScalar>(
|
||||
maxDiagEntry, numext::maxi<RealScalar>(abs(m_blockBuffer.coeff(kBlockSize, kBlockSize)),
|
||||
abs(m_blockBuffer.coeff(qq, qq))));
|
||||
// Check if 2x2 block still needs diagonalizing.
|
||||
RealScalar precondThreshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
|
||||
doRealSvd = abs(m_blockBuffer.coeff(kBlockSize, qq)) > precondThreshold ||
|
||||
abs(m_blockBuffer.coeff(qq, kBlockSize)) > precondThreshold;
|
||||
}
|
||||
|
||||
if (doRealSvd) {
|
||||
// Compute real 2x2 SVD: buffer_2x2 = j_left * diag * j_right^T.
|
||||
JacobiRotation<RealScalar> j_left, j_right;
|
||||
internal::real_2x2_jacobi_svd(m_blockBuffer, kBlockSize, qq, &j_left, &j_right);
|
||||
m_blockBuffer.applyOnTheLeft(kBlockSize, qq, j_left);
|
||||
m_blockBuffer.applyOnTheRight(kBlockSize, qq, j_right);
|
||||
|
||||
// Accumulate left rotation for deferred GEMM.
|
||||
accum.applyOnTheLeft(kBlockSize, qq, j_left);
|
||||
|
||||
// Right rotation is a column op (contiguous): apply directly.
|
||||
m_workMatrix.applyOnTheRight(p, q + qq, j_right);
|
||||
if (computeU()) m_matrixU.applyOnTheRight(p, q + qq, j_left.transpose());
|
||||
if (computeV()) m_matrixV.applyOnTheRight(p, q + qq, j_right);
|
||||
|
||||
maxDiagEntry = numext::maxi<RealScalar>(
|
||||
maxDiagEntry, numext::maxi<RealScalar>(abs(m_blockBuffer.coeff(kBlockSize, kBlockSize)),
|
||||
abs(m_blockBuffer.coeff(qq, qq))));
|
||||
}
|
||||
threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
|
||||
}
|
||||
}
|
||||
|
||||
// Apply accumulated left rotations: W <- accum * W, via GEMM.
|
||||
// When p == q + kBlockSize, all kBlockSize+1 rows are contiguous.
|
||||
// Otherwise, rows q..q+k-1 and row p are non-adjacent; we split:
|
||||
// [Mq] [L11 l12] [Mq]
|
||||
// [Mp] <- [l21 l22] [Mp]
|
||||
// L11 is lower-triangular (exploited via triangularView).
|
||||
if (blockDirty) {
|
||||
if (p == q + kBlockSize) {
|
||||
m_workMatrix.template middleRows<kBlockSize + 1>(q) =
|
||||
accum * m_workMatrix.template middleRows<kBlockSize + 1>(q);
|
||||
} else {
|
||||
const auto L11 = accum.template topLeftCorner<kBlockSize, kBlockSize>();
|
||||
const auto l12 = accum.col(kBlockSize).template head<kBlockSize>();
|
||||
const auto l21 = accum.row(kBlockSize).template head<kBlockSize>();
|
||||
const Scalar l22 = accum(kBlockSize, kBlockSize);
|
||||
auto Mq = m_workMatrix.template middleRows<kBlockSize>(q);
|
||||
auto Mp = m_workMatrix.row(p);
|
||||
Matrix<Scalar, 1, Dynamic> Mp_save = Mp;
|
||||
Mp.noalias() = l21 * Mq + l22 * Mp_save;
|
||||
Mq = L11.template triangularView<Lower>() * Mq + l12 * Mp_save;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scalar loop for remaining pairs after blocked processing.
|
||||
for (; q < p; ++q) {
|
||||
if (abs(m_workMatrix.coeff(p, q)) > threshold || abs(m_workMatrix.coeff(q, p)) > threshold) {
|
||||
notFinished = true;
|
||||
|
||||
bool doRealSvd = true;
|
||||
EIGEN_IF_CONSTEXPR(NumTraits<Scalar>::IsComplex) {
|
||||
doRealSvd = internal::svd_precondition_2x2_block_to_be_real<MatrixType, Options>::run(m_workMatrix, *this, p,
|
||||
q, maxDiagEntry);
|
||||
}
|
||||
|
||||
if (doRealSvd) {
|
||||
JacobiRotation<RealScalar> j_left, j_right;
|
||||
internal::real_2x2_jacobi_svd(m_workMatrix, p, q, &j_left, &j_right);
|
||||
m_workMatrix.applyOnTheLeft(p, q, j_left);
|
||||
if (computeU()) m_matrixU.applyOnTheRight(p, q, j_left.transpose());
|
||||
m_workMatrix.applyOnTheRight(p, q, j_right);
|
||||
if (computeV()) m_matrixV.applyOnTheRight(p, q, j_right);
|
||||
maxDiagEntry = numext::maxi<RealScalar>(
|
||||
maxDiagEntry, numext::maxi<RealScalar>(abs(m_workMatrix.coeff(p, p)), abs(m_workMatrix.coeff(q, q))));
|
||||
}
|
||||
threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return notFinished;
|
||||
}
|
||||
|
||||
/** \svd_module
|
||||
*
|
||||
* \return the singular value decomposition of \c *this computed by two-sided
|
||||
|
||||
62
benchmarks/SVD/bench_jacobi_sweep.cpp
Normal file
62
benchmarks/SVD/bench_jacobi_sweep.cpp
Normal file
@@ -0,0 +1,62 @@
|
||||
// Standalone JacobiSVD benchmark for block-size and threshold sweeps.
|
||||
// Compile-time parameters via -D flags:
|
||||
// BLOCK_SIZE (default 24), BLOCKING_THRESHOLD (default 192)
|
||||
//
|
||||
// Build example:
|
||||
// g++ -O3 -DNDEBUG -march=native -DBLOCK_SIZE=16 -DBLOCKING_THRESHOLD=128
|
||||
// -I../.. bench_jacobi_sweep.cpp -lbenchmark -lbenchmark_main -lpthread
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
// Override block size before including Eigen headers.
|
||||
#ifdef BLOCK_SIZE
|
||||
#define EIGEN_JACOBI_SVD_BLOCK_SIZE BLOCK_SIZE
|
||||
#endif
|
||||
#ifdef BLOCKING_THRESHOLD
|
||||
#define EIGEN_JACOBI_SVD_BLOCKING_THRESHOLD BLOCKING_THRESHOLD
|
||||
#endif
|
||||
|
||||
#include <Eigen/Dense>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
template <typename Scalar>
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
|
||||
template <typename SVD>
|
||||
EIGEN_DONT_INLINE void do_compute(SVD& svd, const typename SVD::MatrixType& A) {
|
||||
svd.compute(A);
|
||||
}
|
||||
|
||||
template <typename Scalar, int Options>
|
||||
static void BM_JacobiSVD(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Mat<Scalar> A = Mat<Scalar>::Random(n, n);
|
||||
JacobiSVD<Mat<Scalar>, Options> svd(n, n);
|
||||
for (auto _ : state) {
|
||||
do_compute(svd, A);
|
||||
benchmark::DoNotOptimize(svd.singularValues().data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
static void Sizes(::benchmark::Benchmark* b) {
|
||||
for (int s : {32, 64, 128, 192, 256, 384, 512}) b->Args({s});
|
||||
}
|
||||
|
||||
// float ValuesOnly
|
||||
BENCHMARK(BM_JacobiSVD<float, 0>)->Apply(Sizes)->Name("Jacobi_float_VO");
|
||||
// float ThinUV
|
||||
BENCHMARK(BM_JacobiSVD<float, ComputeThinU | ComputeThinV>)->Apply(Sizes)->Name("Jacobi_float_UV");
|
||||
// double ValuesOnly
|
||||
BENCHMARK(BM_JacobiSVD<double, 0>)->Apply(Sizes)->Name("Jacobi_double_VO");
|
||||
// double ThinUV
|
||||
BENCHMARK(BM_JacobiSVD<double, ComputeThinU | ComputeThinV>)->Apply(Sizes)->Name("Jacobi_double_UV");
|
||||
// complex<float> ValuesOnly
|
||||
BENCHMARK(BM_JacobiSVD<std::complex<float>, 0>)->Apply(Sizes)->Name("Jacobi_cfloat_VO");
|
||||
// complex<float> ThinUV
|
||||
BENCHMARK((BM_JacobiSVD<std::complex<float>, ComputeThinU | ComputeThinV>))->Apply(Sizes)->Name("Jacobi_cfloat_UV");
|
||||
// complex<double> ValuesOnly
|
||||
BENCHMARK(BM_JacobiSVD<std::complex<double>, 0>)->Apply(Sizes)->Name("Jacobi_cdouble_VO");
|
||||
// complex<double> ThinUV
|
||||
BENCHMARK((BM_JacobiSVD<std::complex<double>, ComputeThinU | ComputeThinV>))->Apply(Sizes)->Name("Jacobi_cdouble_UV");
|
||||
Reference in New Issue
Block a user