Migrate Eigen benchmarks to the Google benchmark framework

libeigen/eigen!2132

Closes #3025

Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
Rasmus Munk Larsen
2026-02-17 20:51:36 -08:00
parent 740cac97b4
commit 3108f6360e
23 changed files with 1632 additions and 0 deletions

60
benchmarks/CMakeLists.txt Normal file
View File

@@ -0,0 +1,60 @@
cmake_minimum_required(VERSION 3.10)
project(EigenBenchmarks CXX)
find_package(benchmark REQUIRED)
# Eigen is a header-only library; find it relative to this directory.
set(EIGEN_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/..")
# Helper: add a Google Benchmark target.
# eigen_add_benchmark(name source [LIBRARIES lib1 lib2 ...] [DEFINITIONS def1 def2 ...])
function(eigen_add_benchmark name source)
cmake_parse_arguments(BENCH "" "" "LIBRARIES;DEFINITIONS" ${ARGN})
add_executable(${name} ${source})
target_include_directories(${name} PRIVATE ${EIGEN_SOURCE_DIR})
target_link_libraries(${name} PRIVATE benchmark::benchmark benchmark::benchmark_main)
if(BENCH_LIBRARIES)
target_link_libraries(${name} PRIVATE ${BENCH_LIBRARIES})
endif()
target_compile_options(${name} PRIVATE -O3 -DNDEBUG)
if(BENCH_DEFINITIONS)
target_compile_definitions(${name} PRIVATE ${BENCH_DEFINITIONS})
endif()
endfunction()
# --- Dense benchmarks ---
eigen_add_benchmark(bench_cholesky benchCholesky.cpp)
eigen_add_benchmark(bench_eigensolver benchEigenSolver.cpp)
eigen_add_benchmark(bench_fft benchFFT.cpp)
eigen_add_benchmark(bench_geometry_transforms benchGeometry.cpp)
eigen_add_benchmark(bench_vecadd benchVecAdd.cpp)
eigen_add_benchmark(bench_gemm benchGemm.cpp)
eigen_add_benchmark(bench_gemm_double benchGemm.cpp DEFINITIONS SCALAR=double)
eigen_add_benchmark(bench_gemv benchGemv.cpp)
eigen_add_benchmark(bench_move_semantics bench_move_semantics.cpp)
eigen_add_benchmark(bench_reverse bench_reverse.cpp)
eigen_add_benchmark(bench_dense_solvers dense_solvers.cpp)
eigen_add_benchmark(bench_trsm bench_trsm.cpp)
eigen_add_benchmark(bench_svd bench_svd.cpp)
eigen_add_benchmark(bench_eig33 eig33.cpp)
eigen_add_benchmark(bench_geometry geometry.cpp)
eigen_add_benchmark(bench_quatmul quatmul.cpp)
# --- Sparse benchmarks ---
eigen_add_benchmark(bench_sparse_dense_product sparse_dense_product.cpp)
eigen_add_benchmark(bench_sparse_product sparse_product.cpp)
eigen_add_benchmark(bench_sparse_transpose sparse_transpose.cpp)
# --- GEMM blocking parameter sweep ---
eigen_add_benchmark(bench_blocking_sizes benchmark_blocking_sizes.cpp)
# --- AOCL benchmark (AOCL optional) ---
eigen_add_benchmark(bench_aocl benchmark_aocl.cpp)
# Optional: BLAS GEMM comparison benchmark (requires CBLAS)
find_package(BLAS QUIET)
if(BLAS_FOUND)
eigen_add_benchmark(bench_blas_gemm benchBlasGemm.cpp
LIBRARIES ${BLAS_LIBRARIES}
DEFINITIONS HAVE_BLAS)
endif()

View File

@@ -0,0 +1,73 @@
// Benchmark: Eigen GEMM vs CBLAS GEMM
// Requires CBLAS: compile with -DHAVE_BLAS and link -lcblas
//
// Based on bench/benchBlasGemm.cpp
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
#ifndef SCALAR
#define SCALAR float
#endif
typedef SCALAR Scalar;
typedef Matrix<Scalar, Dynamic, Dynamic> MyMatrix;
static void BM_EigenGemm(benchmark::State& state) {
int M = state.range(0);
int N = state.range(1);
int K = state.range(2);
MyMatrix a = MyMatrix::Random(M, K);
MyMatrix b = MyMatrix::Random(K, N);
MyMatrix c = MyMatrix::Random(M, N);
for (auto _ : state) {
c.noalias() += a * b;
benchmark::DoNotOptimize(c.data());
}
state.counters["GFLOPS"] =
benchmark::Counter(2.0 * M * N * K, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
#ifdef HAVE_BLAS
extern "C" {
#include <cblas.h>
}
#ifdef _FLOAT
#define CBLAS_GEMM cblas_sgemm
#else
#define CBLAS_GEMM cblas_dgemm
#endif
static void BM_CblasGemm(benchmark::State& state) {
int M = state.range(0);
int N = state.range(1);
int K = state.range(2);
MyMatrix a = MyMatrix::Random(M, K);
MyMatrix b = MyMatrix::Random(K, N);
MyMatrix c = MyMatrix::Random(M, N);
Scalar alpha = 1, beta = 1;
for (auto _ : state) {
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, a.data(), M, b.data(), K, beta, c.data(), M);
benchmark::DoNotOptimize(c.data());
}
state.counters["GFLOPS"] =
benchmark::Counter(2.0 * M * N * K, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
#endif
static void GemmSizes(::benchmark::Benchmark* b) {
for (int s : {32, 64, 128, 256, 512, 1024, 2048}) {
b->Args({s, s, s});
}
// Rectangular
b->Args({1000, 100, 1000});
b->Args({100, 1000, 100});
}
BENCHMARK(BM_EigenGemm)->Apply(GemmSizes);
#ifdef HAVE_BLAS
BENCHMARK(BM_CblasGemm)->Apply(GemmSizes);
#endif

View File

@@ -0,0 +1,48 @@
#include <benchmark/benchmark.h>
#include <Eigen/Core>
#include <Eigen/Cholesky>
using namespace Eigen;
typedef float Scalar;
static void BM_LDLT(benchmark::State& state) {
int n = state.range(0);
typedef Matrix<Scalar, Dynamic, Dynamic> MatrixType;
typedef Matrix<Scalar, Dynamic, Dynamic> SquareMatrixType;
MatrixType a = MatrixType::Random(n, n);
SquareMatrixType covMat = a * a.adjoint();
int r = internal::random<int>(0, n - 1);
int c = internal::random<int>(0, n - 1);
Scalar acc = 0;
for (auto _ : state) {
LDLT<SquareMatrixType> cholnosqrt(covMat);
acc += cholnosqrt.matrixL().coeff(r, c);
benchmark::DoNotOptimize(acc);
}
}
BENCHMARK(BM_LDLT)->RangeMultiplier(2)->Range(4, 1500);
static void BM_LLT(benchmark::State& state) {
int n = state.range(0);
typedef Matrix<Scalar, Dynamic, Dynamic> MatrixType;
typedef Matrix<Scalar, Dynamic, Dynamic> SquareMatrixType;
MatrixType a = MatrixType::Random(n, n);
SquareMatrixType covMat = a * a.adjoint();
int r = internal::random<int>(0, n - 1);
int c = internal::random<int>(0, n - 1);
Scalar acc = 0;
for (auto _ : state) {
LLT<SquareMatrixType> chol(covMat);
acc += chol.matrixL().coeff(r, c);
benchmark::DoNotOptimize(acc);
}
double cost = 0;
for (int j = 0; j < n; ++j) {
int rem = std::max(n - j - 1, 0);
cost += 2 * (rem * j + rem + j);
}
state.counters["GFLOPS"] =
benchmark::Counter(cost, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
BENCHMARK(BM_LLT)->RangeMultiplier(2)->Range(4, 1500);

View File

@@ -0,0 +1,42 @@
#include <benchmark/benchmark.h>
#include <Eigen/Core>
#include <Eigen/QR>
#include <Eigen/Eigenvalues>
using namespace Eigen;
typedef float Scalar;
static void BM_SelfAdjointEigenSolver(benchmark::State& state) {
int n = state.range(0);
typedef Matrix<Scalar, Dynamic, Dynamic> MatrixType;
MatrixType a = MatrixType::Random(n, n);
MatrixType covMat = a * a.adjoint();
int r = internal::random<int>(0, n - 1);
int c = internal::random<int>(0, n - 1);
Scalar acc = 0;
SelfAdjointEigenSolver<MatrixType> ei(covMat);
for (auto _ : state) {
ei.compute(covMat);
acc += ei.eigenvectors().coeff(r, c);
benchmark::DoNotOptimize(acc);
}
}
BENCHMARK(BM_SelfAdjointEigenSolver)->RangeMultiplier(2)->Range(4, 512);
static void BM_EigenSolver(benchmark::State& state) {
int n = state.range(0);
typedef Matrix<Scalar, Dynamic, Dynamic> MatrixType;
MatrixType a = MatrixType::Random(n, n);
MatrixType covMat = a * a.adjoint();
int r = internal::random<int>(0, n - 1);
int c = internal::random<int>(0, n - 1);
Scalar acc = 0;
EigenSolver<MatrixType> ei(covMat);
for (auto _ : state) {
ei.compute(covMat);
acc += std::norm(ei.eigenvectors().coeff(r, c));
benchmark::DoNotOptimize(acc);
}
}
BENCHMARK(BM_EigenSolver)->RangeMultiplier(2)->Range(4, 512);

43
benchmarks/benchFFT.cpp Normal file
View File

@@ -0,0 +1,43 @@
#include <benchmark/benchmark.h>
#include <Eigen/Core>
#include <unsupported/Eigen/FFT>
#include <complex>
#include <vector>
using namespace Eigen;
template <typename T, bool Forward, bool Unscaled = false, bool HalfSpec = false>
static void BM_FFT(benchmark::State& state) {
typedef typename NumTraits<T>::Real ScalarType;
typedef std::complex<ScalarType> Complex;
int nfft = state.range(0);
std::vector<T> inbuf(nfft);
std::vector<Complex> outbuf(nfft);
FFT<ScalarType> fft;
if (Unscaled) fft.SetFlag(fft.Unscaled);
if (HalfSpec) fft.SetFlag(fft.HalfSpectrum);
std::fill(inbuf.begin(), inbuf.end(), T(0));
fft.fwd(outbuf, inbuf);
for (auto _ : state) {
if (Forward)
fft.fwd(outbuf, inbuf);
else
fft.inv(inbuf, outbuf);
benchmark::DoNotOptimize(outbuf.data());
benchmark::DoNotOptimize(inbuf.data());
}
double mflops_per_iter = 5.0 * nfft * std::log2(static_cast<double>(nfft));
if (!NumTraits<T>::IsComplex) mflops_per_iter /= 2;
state.counters["MFLOPS"] =
benchmark::Counter(mflops_per_iter, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
BENCHMARK(BM_FFT<std::complex<float>, true>)->Arg(1024)->Arg(4096);
BENCHMARK(BM_FFT<std::complex<float>, false>)->Arg(1024)->Arg(4096);
BENCHMARK(BM_FFT<float, true>)->Arg(1024)->Arg(4096);
BENCHMARK(BM_FFT<float, false>)->Arg(1024)->Arg(4096);
BENCHMARK(BM_FFT<std::complex<double>, true>)->Arg(1024)->Arg(4096);
BENCHMARK(BM_FFT<std::complex<double>, false>)->Arg(1024)->Arg(4096);
BENCHMARK(BM_FFT<double, true>)->Arg(1024)->Arg(4096);
BENCHMARK(BM_FFT<double, false>)->Arg(1024)->Arg(4096);

81
benchmarks/benchGemm.cpp Normal file
View File

@@ -0,0 +1,81 @@
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
#ifndef SCALAR
#define SCALAR float
#endif
typedef SCALAR Scalar;
typedef Matrix<Scalar, Dynamic, Dynamic> Mat;
template <typename A, typename B, typename C>
EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c) {
c.noalias() += a * b;
}
static void BM_EigenGemm(benchmark::State& state) {
int m = state.range(0);
int n = state.range(1);
int p = state.range(2);
Mat a(m, p);
a.setRandom();
Mat b(p, n);
b.setRandom();
Mat c = Mat::Zero(m, n);
for (auto _ : state) {
c.setZero();
gemm(a, b, c);
benchmark::DoNotOptimize(c.data());
benchmark::ClobberMemory();
}
state.counters["GFLOPS"] =
benchmark::Counter(2.0 * m * n * p, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
static void GemmSizes(::benchmark::Benchmark* b) {
for (int size : {8, 16, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 384, 448, 512, 768, 1024, 1536, 2048}) {
b->Args({size, size, size});
}
// Non-square sizes
b->Args({64, 64, 1024});
b->Args({1024, 64, 64});
b->Args({64, 1024, 64});
b->Args({256, 256, 1024});
b->Args({1024, 256, 256});
}
BENCHMARK(BM_EigenGemm)->Apply(GemmSizes);
#ifdef HAVE_BLAS
extern "C" {
#include <Eigen/src/misc/blas.h>
}
static void BM_BlasGemm(benchmark::State& state) {
int m = state.range(0);
int n = state.range(1);
int p = state.range(2);
Mat a(m, p);
a.setRandom();
Mat b(p, n);
b.setRandom();
Mat c = Mat::Zero(m, n);
char notrans = 'N';
Scalar one = 1, zero = 0;
for (auto _ : state) {
c.setZero();
if constexpr (std::is_same_v<Scalar, float>) {
sgemm_(&notrans, &notrans, &m, &n, &p, &one, a.data(), &m, b.data(), &p, &one, c.data(), &m);
} else {
dgemm_(&notrans, &notrans, &m, &n, &p, &one, a.data(), &m, b.data(), &p, &one, c.data(), &m);
}
benchmark::DoNotOptimize(c.data());
benchmark::ClobberMemory();
}
state.counters["GFLOPS"] =
benchmark::Counter(2.0 * m * n * p, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
BENCHMARK(BM_BlasGemm)->Apply(GemmSizes);
#endif

160
benchmarks/benchGemv.cpp Normal file
View File

@@ -0,0 +1,160 @@
// Benchmark for dense general matrix-vector multiplication (GEMV).
//
// Tests performance of y += op(A) * x for various matrix sizes, aspect ratios,
// scalar types, and operation variants (transpose, conjugate, adjoint).
//
// The Eigen GEMV kernel (Eigen/src/Core/products/GeneralMatrixVector.h) has
// two main specializations:
// - ColMajor kernel: used for y += A * x with column-major A.
// Processes vertical panels, vectorizes along rows.
// - RowMajor kernel: used for y += A^T * x with column-major A.
// Processes groups of rows, vectorizes the dot product along columns.
//
// For complex scalars, conjugation flags (ConjugateLhs, ConjugateRhs) select
// additional code paths within each kernel via conj_helper.
//
// Operation mapping (for column-major stored A):
// Gemv y += A * x -> ColMajor kernel, no conjugation
// GemvTrans y += A^T * x -> RowMajor kernel, no conjugation
// GemvConj y += conj(A) * x -> ColMajor kernel, ConjugateLhs=true
// GemvAdj y += A^H * x -> RowMajor kernel, ConjugateLhs=true
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
// ---------- Benchmark helpers ----------
// GEMV flop count: 2*m*n for real, 8*m*n for complex.
template <typename Scalar>
double gemvFlops(Index m, Index n) {
return (NumTraits<Scalar>::IsComplex ? 8.0 : 2.0) * m * n;
}
// ---------- y += A * x (ColMajor GEMV kernel, no conjugation) ----------
template <typename Scalar>
static void BM_Gemv(benchmark::State& state) {
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
using Vec = Matrix<Scalar, Dynamic, 1>;
const Index m = state.range(0);
const Index n = state.range(1);
Mat A = Mat::Random(m, n);
Vec x = Vec::Random(n);
Vec y = Vec::Random(m);
for (auto _ : state) {
y.noalias() += A * x;
benchmark::DoNotOptimize(y.data());
benchmark::ClobberMemory();
}
state.counters["GFLOPS"] = benchmark::Counter(gemvFlops<Scalar>(m, n), benchmark::Counter::kIsIterationInvariantRate,
benchmark::Counter::kIs1000);
}
// ---------- y += A^T * x (RowMajor GEMV kernel, no conjugation) ----------
template <typename Scalar>
static void BM_GemvTrans(benchmark::State& state) {
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
using Vec = Matrix<Scalar, Dynamic, 1>;
const Index m = state.range(0);
const Index n = state.range(1);
Mat A = Mat::Random(m, n);
Vec x = Vec::Random(m);
Vec y = Vec::Random(n);
for (auto _ : state) {
y.noalias() += A.transpose() * x;
benchmark::DoNotOptimize(y.data());
benchmark::ClobberMemory();
}
state.counters["GFLOPS"] = benchmark::Counter(gemvFlops<Scalar>(m, n), benchmark::Counter::kIsIterationInvariantRate,
benchmark::Counter::kIs1000);
}
// ---------- y += conj(A) * x (ColMajor kernel, ConjugateLhs=true) ----------
template <typename Scalar>
static void BM_GemvConj(benchmark::State& state) {
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
using Vec = Matrix<Scalar, Dynamic, 1>;
const Index m = state.range(0);
const Index n = state.range(1);
Mat A = Mat::Random(m, n);
Vec x = Vec::Random(n);
Vec y = Vec::Random(m);
for (auto _ : state) {
y.noalias() += A.conjugate() * x;
benchmark::DoNotOptimize(y.data());
benchmark::ClobberMemory();
}
state.counters["GFLOPS"] = benchmark::Counter(gemvFlops<Scalar>(m, n), benchmark::Counter::kIsIterationInvariantRate,
benchmark::Counter::kIs1000);
}
// ---------- y += A^H * x (RowMajor kernel, ConjugateLhs=true) ----------
template <typename Scalar>
static void BM_GemvAdj(benchmark::State& state) {
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
using Vec = Matrix<Scalar, Dynamic, 1>;
const Index m = state.range(0);
const Index n = state.range(1);
Mat A = Mat::Random(m, n);
Vec x = Vec::Random(m);
Vec y = Vec::Random(n);
for (auto _ : state) {
y.noalias() += A.adjoint() * x;
benchmark::DoNotOptimize(y.data());
benchmark::ClobberMemory();
}
state.counters["GFLOPS"] = benchmark::Counter(gemvFlops<Scalar>(m, n), benchmark::Counter::kIsIterationInvariantRate,
benchmark::Counter::kIs1000);
}
// ---------- Size configurations ----------
// All sizes refer to the stored matrix A (m rows, n cols).
static void GemvSizes(::benchmark::Benchmark* b) {
// Square matrices: exercises balanced kernel behavior.
for (int size : {8, 16, 32, 64, 128, 256, 512, 1024, 4096}) {
b->Args({size, size});
}
// Tall-thin (m >> n): in ColMajor kernel, the inner vectorized loop over rows
// is long while the outer column loop is short. In RowMajor kernel (transpose),
// there are many rows to process but short dot products.
for (int n : {1, 4, 16, 64}) {
for (int m : {256, 1024, 4096}) {
if (m != n) b->Args({m, n});
}
}
// Short-wide (m << n): in ColMajor kernel, the outer column loop is long but
// the inner vectorized loop over rows is short. In RowMajor kernel (transpose),
// there are few rows but long dot products.
for (int m : {1, 4, 16, 64}) {
for (int n : {256, 1024, 4096}) {
if (m != n) b->Args({m, n});
}
}
}
// ---------- Register benchmarks ----------
// Real types: Gemv and GemvTrans exercise the two kernel specializations.
// Conjugation is a no-op for real scalars.
BENCHMARK(BM_Gemv<float>)->Apply(GemvSizes)->Name("Gemv_float");
BENCHMARK(BM_Gemv<double>)->Apply(GemvSizes)->Name("Gemv_double");
BENCHMARK(BM_GemvTrans<float>)->Apply(GemvSizes)->Name("GemvTrans_float");
BENCHMARK(BM_GemvTrans<double>)->Apply(GemvSizes)->Name("GemvTrans_double");
// Complex types: all four variants exercise distinct kernel code paths.
BENCHMARK(BM_Gemv<std::complex<float>>)->Apply(GemvSizes)->Name("Gemv_cfloat");
BENCHMARK(BM_Gemv<std::complex<double>>)->Apply(GemvSizes)->Name("Gemv_cdouble");
BENCHMARK(BM_GemvTrans<std::complex<float>>)->Apply(GemvSizes)->Name("GemvTrans_cfloat");
BENCHMARK(BM_GemvTrans<std::complex<double>>)->Apply(GemvSizes)->Name("GemvTrans_cdouble");
BENCHMARK(BM_GemvConj<std::complex<float>>)->Apply(GemvSizes)->Name("GemvConj_cfloat");
BENCHMARK(BM_GemvConj<std::complex<double>>)->Apply(GemvSizes)->Name("GemvConj_cdouble");
BENCHMARK(BM_GemvAdj<std::complex<float>>)->Apply(GemvSizes)->Name("GemvAdj_cfloat");
BENCHMARK(BM_GemvAdj<std::complex<double>>)->Apply(GemvSizes)->Name("GemvAdj_cdouble");

View File

@@ -0,0 +1,43 @@
#include <benchmark/benchmark.h>
#include <Eigen/Core>
#include <Eigen/Geometry>
using namespace Eigen;
template <typename Scalar, int Mode, int VSize>
static void BM_TransformVec(benchmark::State& state) {
typedef Transform<Scalar, 3, Mode> Trans;
typedef Matrix<Scalar, VSize, 1> Vec;
Trans t;
t.setIdentity();
Vec v;
v.setRandom();
for (auto _ : state) {
v = t * v;
benchmark::DoNotOptimize(v.data());
}
}
template <typename Scalar, int Mode>
static void BM_TransformTransform(benchmark::State& state) {
typedef Transform<Scalar, 3, Mode> Trans;
Trans t1, t2;
t1.setIdentity();
t2.setIdentity();
for (auto _ : state) {
t2 = Trans(t1 * t2);
benchmark::DoNotOptimize(t2.data());
}
}
BENCHMARK(BM_TransformVec<float, Isometry, 3>);
BENCHMARK(BM_TransformVec<float, Isometry, 4>);
BENCHMARK(BM_TransformVec<float, Projective, 4>);
BENCHMARK(BM_TransformVec<double, Isometry, 3>);
BENCHMARK(BM_TransformVec<double, Isometry, 4>);
BENCHMARK(BM_TransformVec<double, Projective, 4>);
BENCHMARK(BM_TransformTransform<float, Isometry>);
BENCHMARK(BM_TransformTransform<float, Projective>);
BENCHMARK(BM_TransformTransform<double, Isometry>);
BENCHMARK(BM_TransformTransform<double, Projective>);

View File

@@ -0,0 +1,28 @@
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
static void BM_VecAdd(benchmark::State& state) {
int size = state.range(0);
VectorXf a = VectorXf::Random(size);
VectorXf b = VectorXf::Random(size);
for (auto _ : state) {
a = a + b;
benchmark::DoNotOptimize(a.data());
}
state.SetBytesProcessed(state.iterations() * size * sizeof(float) * 3);
}
BENCHMARK(BM_VecAdd)->RangeMultiplier(4)->Range(64, 1 << 20);
static void BM_MatAdd(benchmark::State& state) {
int n = state.range(0);
MatrixXf a = MatrixXf::Random(n, n);
MatrixXf b = MatrixXf::Random(n, n);
for (auto _ : state) {
a = a + b;
benchmark::DoNotOptimize(a.data());
}
state.SetBytesProcessed(state.iterations() * n * n * sizeof(float) * 3);
}
BENCHMARK(BM_MatAdd)->RangeMultiplier(2)->Range(8, 512);

View File

@@ -0,0 +1,41 @@
#include <benchmark/benchmark.h>
#include <Eigen/Core>
#include "../test/MovableScalar.h"
#include <utility>
template <typename MatrixType>
void copy_matrix(MatrixType& m) {
MatrixType tmp(m);
m = tmp;
}
template <typename MatrixType>
void move_matrix(MatrixType&& m) {
MatrixType tmp(std::move(m));
m = std::move(tmp);
}
template <typename Scalar>
static void BM_CopySemantics(benchmark::State& state) {
using MatrixType = Eigen::Matrix<Eigen::MovableScalar<Scalar>, 1, 10>;
MatrixType data = MatrixType::Random().eval();
for (auto _ : state) {
copy_matrix(data);
benchmark::DoNotOptimize(data.data());
}
}
template <typename Scalar>
static void BM_MoveSemantics(benchmark::State& state) {
using MatrixType = Eigen::Matrix<Eigen::MovableScalar<Scalar>, 1, 10>;
MatrixType data = MatrixType::Random().eval();
for (auto _ : state) {
move_matrix(std::move(data));
benchmark::DoNotOptimize(data.data());
}
}
BENCHMARK(BM_CopySemantics<float>);
BENCHMARK(BM_MoveSemantics<float>);
BENCHMARK(BM_CopySemantics<double>);
BENCHMARK(BM_MoveSemantics<double>);

View File

@@ -0,0 +1,30 @@
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
static void BM_MatrixReverse(benchmark::State& state) {
int n = state.range(0);
typedef Matrix<double, Dynamic, Dynamic> MatrixType;
MatrixType a = MatrixType::Random(n, n);
MatrixType b(n, n);
for (auto _ : state) {
b = a.reverse();
benchmark::DoNotOptimize(b.data());
}
state.SetBytesProcessed(state.iterations() * n * n * sizeof(double));
}
BENCHMARK(BM_MatrixReverse)->RangeMultiplier(2)->Range(4, 512);
static void BM_VectorReverse(benchmark::State& state) {
int n = state.range(0);
typedef Matrix<double, Dynamic, 1> VectorType;
VectorType a = VectorType::Random(n);
VectorType b(n);
for (auto _ : state) {
b = a.reverse();
benchmark::DoNotOptimize(b.data());
}
state.SetBytesProcessed(state.iterations() * n * sizeof(double));
}
BENCHMARK(BM_VectorReverse)->RangeMultiplier(4)->Range(16, 1 << 18);

103
benchmarks/bench_svd.cpp Normal file
View File

@@ -0,0 +1,103 @@
#include <benchmark/benchmark.h>
#include <Eigen/Dense>
using namespace Eigen;
// Benchmark JacobiSVD and BDCSVD for various scalar types, matrix shapes,
// and computation options.
// ---------- helpers ----------
template <typename Scalar>
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
template <typename SVD>
EIGEN_DONT_INLINE void do_compute(SVD& svd, const typename SVD::MatrixType& A) {
svd.compute(A);
}
// ---------- JacobiSVD ----------
template <typename Scalar, int Options>
static void BM_JacobiSVD(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
Mat<Scalar> A = Mat<Scalar>::Random(rows, cols);
JacobiSVD<Mat<Scalar>, Options> svd(rows, cols);
for (auto _ : state) {
do_compute(svd, A);
benchmark::DoNotOptimize(svd.singularValues().data());
}
state.SetItemsProcessed(state.iterations());
}
// ---------- BDCSVD ----------
template <typename Scalar, int Options>
static void BM_BDCSVD(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
Mat<Scalar> A = Mat<Scalar>::Random(rows, cols);
BDCSVD<Mat<Scalar>, Options> svd(rows, cols);
for (auto _ : state) {
do_compute(svd, A);
benchmark::DoNotOptimize(svd.singularValues().data());
}
state.SetItemsProcessed(state.iterations());
}
// ---------- Size configurations ----------
// Sizes suitable for JacobiSVD (O(n^2 p), expensive for large n).
static void JacobiSizes(::benchmark::Benchmark* b) {
// Square
for (int s : {4, 8, 16, 32, 64, 128, 256, 512}) b->Args({s, s});
// Tall-skinny
b->Args({100, 4});
b->Args({1000, 4});
b->Args({1000, 10});
}
// Sizes suitable for BDCSVD (divide-and-conquer, faster for large matrices).
static void BDCSizes(::benchmark::Benchmark* b) {
// Square
for (int s : {4, 8, 16, 32, 64, 128, 256, 512, 1024}) b->Args({s, s});
// Tall-skinny (triggers R-bidiagonalization when aspect ratio > 4)
b->Args({100, 4});
b->Args({1000, 4});
b->Args({1000, 10});
b->Args({1000, 100});
b->Args({10000, 10});
b->Args({10000, 100});
}
// ---------- Register benchmarks ----------
// JacobiSVD — float
BENCHMARK(BM_JacobiSVD<float, ComputeThinU | ComputeThinV>)->Apply(JacobiSizes)->Name("JacobiSVD_float_ThinUV");
BENCHMARK(BM_JacobiSVD<float, 0>)->Apply(JacobiSizes)->Name("JacobiSVD_float_ValuesOnly");
// JacobiSVD — double
BENCHMARK(BM_JacobiSVD<double, ComputeThinU | ComputeThinV>)->Apply(JacobiSizes)->Name("JacobiSVD_double_ThinUV");
BENCHMARK(BM_JacobiSVD<double, 0>)->Apply(JacobiSizes)->Name("JacobiSVD_double_ValuesOnly");
// BDCSVD — float
BENCHMARK(BM_BDCSVD<float, ComputeThinU | ComputeThinV>)->Apply(BDCSizes)->Name("BDCSVD_float_ThinUV");
BENCHMARK(BM_BDCSVD<float, 0>)->Apply(BDCSizes)->Name("BDCSVD_float_ValuesOnly");
// BDCSVD — double
BENCHMARK(BM_BDCSVD<double, ComputeThinU | ComputeThinV>)->Apply(BDCSizes)->Name("BDCSVD_double_ThinUV");
BENCHMARK(BM_BDCSVD<double, 0>)->Apply(BDCSizes)->Name("BDCSVD_double_ValuesOnly");
// JacobiSVD — QR preconditioner comparison (double, 64x64, ThinUV)
BENCHMARK(BM_JacobiSVD<double, ComputeThinU | ComputeThinV | ColPivHouseholderQRPreconditioner>)
->Args({64, 64})
->Args({1000, 10})
->Name("JacobiSVD_double_ColPivQR");
BENCHMARK(BM_JacobiSVD<double, ComputeThinU | ComputeThinV | HouseholderQRPreconditioner>)
->Args({64, 64})
->Args({1000, 10})
->Name("JacobiSVD_double_HouseholderQR");
BENCHMARK(BM_JacobiSVD<double, ComputeFullU | ComputeFullV | FullPivHouseholderQRPreconditioner>)
->Args({64, 64})
->Name("JacobiSVD_double_FullPivQR");

99
benchmarks/bench_trsm.cpp Normal file
View File

@@ -0,0 +1,99 @@
#include <benchmark/benchmark.h>
#include <Eigen/Dense>
using namespace Eigen;
// ---------- TRSV: triangular solve with single RHS vector ----------
template <typename Scalar, unsigned int Mode>
static void BM_TRSV(benchmark::State& state) {
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
using Vec = Matrix<Scalar, Dynamic, 1>;
const Index n = state.range(0);
Mat A = Mat::Random(n, n);
// Make diagonally dominant to ensure well-conditioned triangular part.
A.diagonal().array() += Scalar(n);
Vec x = Vec::Random(n);
Vec b = x;
for (auto _ : state) {
x = b;
A.template triangularView<Mode>().solveInPlace(x);
benchmark::DoNotOptimize(x.data());
}
state.SetItemsProcessed(state.iterations() * n * n);
}
// ---------- TRSM: triangular solve with multiple RHS (OnTheLeft) ----------
template <typename Scalar, unsigned int Mode>
static void BM_TRSM_Left(benchmark::State& state) {
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
const Index n = state.range(0);
const Index nrhs = state.range(1);
Mat A = Mat::Random(n, n);
A.diagonal().array() += Scalar(n);
Mat X = Mat::Random(n, nrhs);
Mat B = X;
for (auto _ : state) {
X = B;
A.template triangularView<Mode>().solveInPlace(X);
benchmark::DoNotOptimize(X.data());
}
state.SetItemsProcessed(state.iterations() * n * n * nrhs);
}
// ---------- TRSM: triangular solve with multiple RHS (OnTheRight) ----------
template <typename Scalar, unsigned int Mode>
static void BM_TRSM_Right(benchmark::State& state) {
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
const Index n = state.range(0);
const Index nrhs = state.range(1);
Mat A = Mat::Random(n, n);
A.diagonal().array() += Scalar(n);
Mat X = Mat::Random(nrhs, n);
Mat B = X;
for (auto _ : state) {
X = B;
A.template triangularView<Mode>().template solveInPlace<OnTheRight>(X);
benchmark::DoNotOptimize(X.data());
}
state.SetItemsProcessed(state.iterations() * n * n * nrhs);
}
// ---------- Size configurations ----------
static void TrsvSizes(::benchmark::Benchmark* b) {
for (int n : {32, 64, 128, 256, 512, 1024}) {
b->Args({n});
}
}
static void TrsmSizes(::benchmark::Benchmark* b) {
for (int n : {32, 64, 128, 256, 512, 1024}) {
for (int nrhs : {1, 4, 16, 64, 256}) {
b->Args({n, nrhs});
}
}
}
// ---------- TRSV benchmarks ----------
BENCHMARK(BM_TRSV<float, Lower>)->Apply(TrsvSizes)->Name("TRSV_float_Lower");
BENCHMARK(BM_TRSV<float, Upper>)->Apply(TrsvSizes)->Name("TRSV_float_Upper");
BENCHMARK(BM_TRSV<double, Lower>)->Apply(TrsvSizes)->Name("TRSV_double_Lower");
BENCHMARK(BM_TRSV<double, Upper>)->Apply(TrsvSizes)->Name("TRSV_double_Upper");
// ---------- TRSM Left benchmarks ----------
BENCHMARK(BM_TRSM_Left<float, Lower>)->Apply(TrsmSizes)->Name("TRSM_Left_float_Lower");
BENCHMARK(BM_TRSM_Left<float, Upper>)->Apply(TrsmSizes)->Name("TRSM_Left_float_Upper");
BENCHMARK(BM_TRSM_Left<double, Lower>)->Apply(TrsmSizes)->Name("TRSM_Left_double_Lower");
BENCHMARK(BM_TRSM_Left<double, Upper>)->Apply(TrsmSizes)->Name("TRSM_Left_double_Upper");
// ---------- TRSM Right benchmarks ----------
BENCHMARK(BM_TRSM_Right<float, Lower>)->Apply(TrsmSizes)->Name("TRSM_Right_float_Lower");
BENCHMARK(BM_TRSM_Right<float, Upper>)->Apply(TrsmSizes)->Name("TRSM_Right_float_Upper");
BENCHMARK(BM_TRSM_Right<double, Lower>)->Apply(TrsmSizes)->Name("TRSM_Right_double_Lower");
BENCHMARK(BM_TRSM_Right<double, Upper>)->Apply(TrsmSizes)->Name("TRSM_Right_double_Upper");

View File

@@ -0,0 +1,123 @@
#include <benchmark/benchmark.h>
#include <Eigen/Core>
#include <Eigen/Dense>
#include <Eigen/Eigenvalues>
using namespace Eigen;
static void BM_VectorExp(benchmark::State& state) {
int n = state.range(0);
VectorXd v = VectorXd::LinSpaced(n, 0.1, 10.0);
VectorXd result(n);
for (auto _ : state) {
result = v.array().exp();
benchmark::DoNotOptimize(result.data());
}
state.SetBytesProcessed(state.iterations() * n * sizeof(double));
}
static void BM_VectorSin(benchmark::State& state) {
int n = state.range(0);
VectorXd v = VectorXd::LinSpaced(n, 0.1, 10.0);
VectorXd result(n);
for (auto _ : state) {
result = v.array().sin();
benchmark::DoNotOptimize(result.data());
}
state.SetBytesProcessed(state.iterations() * n * sizeof(double));
}
static void BM_VectorCos(benchmark::State& state) {
int n = state.range(0);
VectorXd v = VectorXd::LinSpaced(n, 0.1, 10.0);
VectorXd result(n);
for (auto _ : state) {
result = v.array().cos();
benchmark::DoNotOptimize(result.data());
}
state.SetBytesProcessed(state.iterations() * n * sizeof(double));
}
static void BM_VectorSqrt(benchmark::State& state) {
int n = state.range(0);
VectorXd v = VectorXd::LinSpaced(n, 0.1, 10.0);
VectorXd result(n);
for (auto _ : state) {
result = v.array().sqrt();
benchmark::DoNotOptimize(result.data());
}
state.SetBytesProcessed(state.iterations() * n * sizeof(double));
}
static void BM_VectorLog(benchmark::State& state) {
int n = state.range(0);
VectorXd v = VectorXd::LinSpaced(n, 0.1, 10.0);
VectorXd result(n);
for (auto _ : state) {
result = v.array().log();
benchmark::DoNotOptimize(result.data());
}
state.SetBytesProcessed(state.iterations() * n * sizeof(double));
}
static void BM_VectorTanh(benchmark::State& state) {
int n = state.range(0);
VectorXd v = VectorXd::LinSpaced(n, 0.1, 10.0);
VectorXd result(n);
for (auto _ : state) {
result = v.array().tanh();
benchmark::DoNotOptimize(result.data());
}
state.SetBytesProcessed(state.iterations() * n * sizeof(double));
}
static void VectorSizes(::benchmark::Benchmark* b) {
for (int n : {10000, 100000, 1000000, 5000000}) {
b->Arg(n);
}
}
BENCHMARK(BM_VectorExp)->Apply(VectorSizes);
BENCHMARK(BM_VectorSin)->Apply(VectorSizes);
BENCHMARK(BM_VectorCos)->Apply(VectorSizes);
BENCHMARK(BM_VectorSqrt)->Apply(VectorSizes);
BENCHMARK(BM_VectorLog)->Apply(VectorSizes);
BENCHMARK(BM_VectorTanh)->Apply(VectorSizes);
static void BM_DGEMM(benchmark::State& state) {
int n = state.range(0);
MatrixXd A = MatrixXd::Random(n, n);
MatrixXd B = MatrixXd::Random(n, n);
MatrixXd C(n, n);
for (auto _ : state) {
C.noalias() = A * B;
benchmark::DoNotOptimize(C.data());
}
state.counters["GFLOPS"] =
benchmark::Counter(2.0 * n * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
BENCHMARK(BM_DGEMM)->Arg(256)->Arg(512)->Arg(1024)->Arg(2048);
static void BM_EigenDecomposition(benchmark::State& state) {
int n = state.range(0);
MatrixXd M = MatrixXd::Random(n, n);
M = (M + M.transpose()) * 0.5;
SelfAdjointEigenSolver<MatrixXd> solver;
for (auto _ : state) {
solver.compute(M);
benchmark::DoNotOptimize(solver.eigenvalues().data());
}
}
BENCHMARK(BM_EigenDecomposition)->Arg(256)->Arg(512)->Arg(1024);
static void BM_FSI_Risk(benchmark::State& state) {
int numPeriods = state.range(0);
int numAssets = state.range(1);
MatrixXd returns = MatrixXd::Random(numPeriods, numAssets);
for (auto _ : state) {
MatrixXd cov = (returns.transpose() * returns) / (numPeriods - 1);
SelfAdjointEigenSolver<MatrixXd> solver(cov);
benchmark::DoNotOptimize(solver.eigenvalues().data());
}
}
BENCHMARK(BM_FSI_Risk)->Args({10000, 500});

View File

@@ -0,0 +1,76 @@
#include <benchmark/benchmark.h>
#include <cstdint>
bool eigen_use_specific_block_size;
int eigen_block_size_k, eigen_block_size_m, eigen_block_size_n;
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES eigen_use_specific_block_size
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K eigen_block_size_k
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M eigen_block_size_m
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N eigen_block_size_n
#include <Eigen/Core>
using namespace Eigen;
typedef MatrixXf MatrixType;
typedef MatrixType::Scalar Scalar;
static void BM_GemmDefaultBlocking(benchmark::State& state) {
int k = state.range(0);
int m = state.range(1);
int n = state.range(2);
eigen_use_specific_block_size = false;
MatrixType lhs = MatrixType::Random(m, k);
MatrixType rhs = MatrixType::Random(k, n);
MatrixType dst = MatrixType::Zero(m, n);
for (auto _ : state) {
dst.noalias() = lhs * rhs;
benchmark::DoNotOptimize(dst.data());
}
state.counters["GFLOPS"] =
benchmark::Counter(2.0 * k * m * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
static void BM_GemmCustomBlocking(benchmark::State& state) {
int k = state.range(0);
int m = state.range(1);
int n = state.range(2);
int bk = state.range(3);
int bm = state.range(4);
int bn = state.range(5);
eigen_use_specific_block_size = true;
eigen_block_size_k = bk;
eigen_block_size_m = bm;
eigen_block_size_n = bn;
MatrixType lhs = MatrixType::Random(m, k);
MatrixType rhs = MatrixType::Random(k, n);
MatrixType dst = MatrixType::Zero(m, n);
for (auto _ : state) {
dst.noalias() = lhs * rhs;
benchmark::DoNotOptimize(dst.data());
}
state.counters["GFLOPS"] =
benchmark::Counter(2.0 * k * m * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
static void DefaultBlockingSizes(::benchmark::Benchmark* b) {
for (int s : {64, 128, 256, 512, 1024, 2048}) {
b->Args({s, s, s});
}
}
static void CustomBlockingSizes(::benchmark::Benchmark* b) {
// Test a few product sizes with varying block sizes
for (int s : {256, 512, 1024}) {
for (int bk : {16, 32, 64, 128, 256}) {
if (bk > s) continue;
for (int bm : {16, 32, 64, 128, 256}) {
if (bm > s) continue;
b->Args({s, s, s, bk, bm, s});
}
}
}
}
BENCHMARK(BM_GemmDefaultBlocking)->Apply(DefaultBlockingSizes);
BENCHMARK(BM_GemmCustomBlocking)->Apply(CustomBlockingSizes);

View File

@@ -0,0 +1,167 @@
#include <benchmark/benchmark.h>
#include <Eigen/Dense>
using namespace Eigen;
typedef float Scalar;
typedef Matrix<Scalar, Dynamic, Dynamic> Mat;
template <typename Solver>
EIGEN_DONT_INLINE void compute_norm_equation(Solver& solver, const Mat& A) {
if (A.rows() != A.cols())
solver.compute(A.transpose() * A);
else
solver.compute(A);
}
template <typename Solver>
EIGEN_DONT_INLINE void compute(Solver& solver, const Mat& A) {
solver.compute(A);
}
static void BM_LLT(benchmark::State& state) {
int rows = state.range(0);
int cols = state.range(1);
int size = cols;
Mat A(rows, cols);
A.setRandom();
if (rows == cols) A = A * A.adjoint();
LLT<Mat> solver(size);
for (auto _ : state) {
compute_norm_equation(solver, A);
benchmark::DoNotOptimize(solver.matrixLLT().data());
}
}
static void BM_LDLT(benchmark::State& state) {
int rows = state.range(0);
int cols = state.range(1);
int size = cols;
Mat A(rows, cols);
A.setRandom();
if (rows == cols) A = A * A.adjoint();
LDLT<Mat> solver(size);
for (auto _ : state) {
compute_norm_equation(solver, A);
benchmark::DoNotOptimize(solver.matrixLDLT().data());
}
}
static void BM_PartialPivLU(benchmark::State& state) {
int rows = state.range(0);
int cols = state.range(1);
int size = cols;
Mat A(rows, cols);
A.setRandom();
if (rows == cols) A = A * A.adjoint();
PartialPivLU<Mat> solver(size);
for (auto _ : state) {
compute_norm_equation(solver, A);
benchmark::DoNotOptimize(solver.matrixLU().data());
}
}
static void BM_FullPivLU(benchmark::State& state) {
int rows = state.range(0);
int cols = state.range(1);
int size = cols;
Mat A(rows, cols);
A.setRandom();
if (rows == cols) A = A * A.adjoint();
FullPivLU<Mat> solver(size, size);
for (auto _ : state) {
compute_norm_equation(solver, A);
benchmark::DoNotOptimize(solver.matrixLU().data());
}
}
static void BM_HouseholderQR(benchmark::State& state) {
int rows = state.range(0);
int cols = state.range(1);
Mat A = Mat::Random(rows, cols);
HouseholderQR<Mat> solver(rows, cols);
for (auto _ : state) {
compute(solver, A);
benchmark::DoNotOptimize(solver.matrixQR().data());
}
}
static void BM_ColPivHouseholderQR(benchmark::State& state) {
int rows = state.range(0);
int cols = state.range(1);
Mat A = Mat::Random(rows, cols);
ColPivHouseholderQR<Mat> solver(rows, cols);
for (auto _ : state) {
compute(solver, A);
benchmark::DoNotOptimize(solver.matrixQR().data());
}
}
static void BM_COD(benchmark::State& state) {
int rows = state.range(0);
int cols = state.range(1);
Mat A = Mat::Random(rows, cols);
CompleteOrthogonalDecomposition<Mat> solver(rows, cols);
for (auto _ : state) {
compute(solver, A);
benchmark::DoNotOptimize(solver.matrixQTZ().data());
}
}
static void BM_FullPivHouseholderQR(benchmark::State& state) {
int rows = state.range(0);
int cols = state.range(1);
Mat A = Mat::Random(rows, cols);
FullPivHouseholderQR<Mat> solver(rows, cols);
for (auto _ : state) {
compute(solver, A);
benchmark::DoNotOptimize(solver.matrixQR().data());
}
}
static void BM_JacobiSVD(benchmark::State& state) {
int rows = state.range(0);
int cols = state.range(1);
Mat A = Mat::Random(rows, cols);
JacobiSVD<Mat, ComputeThinU | ComputeThinV> solver(rows, cols);
for (auto _ : state) {
solver.compute(A);
benchmark::DoNotOptimize(solver.singularValues().data());
}
}
static void BM_BDCSVD(benchmark::State& state) {
int rows = state.range(0);
int cols = state.range(1);
Mat A = Mat::Random(rows, cols);
BDCSVD<Mat, ComputeThinU | ComputeThinV> solver(rows, cols);
for (auto _ : state) {
solver.compute(A);
benchmark::DoNotOptimize(solver.singularValues().data());
}
}
static void DenseSolverSizes(::benchmark::Benchmark* b) {
// Square sizes
for (int s : {8, 100, 1000}) {
b->Args({s, s});
}
// Tall-skinny sizes
b->Args({10000, 8});
b->Args({10000, 100});
}
BENCHMARK(BM_LLT)->Apply(DenseSolverSizes);
BENCHMARK(BM_LDLT)->Apply(DenseSolverSizes);
BENCHMARK(BM_PartialPivLU)->Apply(DenseSolverSizes);
BENCHMARK(BM_FullPivLU)->Apply(DenseSolverSizes);
BENCHMARK(BM_HouseholderQR)->Apply(DenseSolverSizes);
BENCHMARK(BM_ColPivHouseholderQR)->Apply(DenseSolverSizes);
BENCHMARK(BM_COD)->Apply(DenseSolverSizes);
BENCHMARK(BM_FullPivHouseholderQR)->Apply(DenseSolverSizes);
BENCHMARK(BM_JacobiSVD)->Apply([](::benchmark::Benchmark* b) {
// JacobiSVD is very slow for large matrices
for (int s : {8, 100}) b->Args({s, s});
b->Args({10000, 8});
});
BENCHMARK(BM_BDCSVD)->Apply(DenseSolverSizes);

97
benchmarks/eig33.cpp Normal file
View File

@@ -0,0 +1,97 @@
#include <benchmark/benchmark.h>
#include <Eigen/Core>
#include <Eigen/Eigenvalues>
#include <Eigen/Geometry>
using namespace Eigen;
template <typename Matrix, typename Roots>
inline void computeRoots(const Matrix& m, Roots& roots) {
typedef typename Matrix::Scalar Scalar;
const Scalar s_inv3 = 1.0 / 3.0;
const Scalar s_sqrt3 = std::sqrt(Scalar(3.0));
Scalar c0 = m(0, 0) * m(1, 1) * m(2, 2) + Scalar(2) * m(0, 1) * m(0, 2) * m(1, 2) - m(0, 0) * m(1, 2) * m(1, 2) -
m(1, 1) * m(0, 2) * m(0, 2) - m(2, 2) * m(0, 1) * m(0, 1);
Scalar c1 = m(0, 0) * m(1, 1) - m(0, 1) * m(0, 1) + m(0, 0) * m(2, 2) - m(0, 2) * m(0, 2) + m(1, 1) * m(2, 2) -
m(1, 2) * m(1, 2);
Scalar c2 = m(0, 0) + m(1, 1) + m(2, 2);
Scalar c2_over_3 = c2 * s_inv3;
Scalar a_over_3 = (c1 - c2 * c2_over_3) * s_inv3;
if (a_over_3 > Scalar(0)) a_over_3 = Scalar(0);
Scalar half_b = Scalar(0.5) * (c0 + c2_over_3 * (Scalar(2) * c2_over_3 * c2_over_3 - c1));
Scalar q = half_b * half_b + a_over_3 * a_over_3 * a_over_3;
if (q > Scalar(0)) q = Scalar(0);
Scalar rho = std::sqrt(-a_over_3);
Scalar theta = std::atan2(std::sqrt(-q), half_b) * s_inv3;
Scalar cos_theta = std::cos(theta);
Scalar sin_theta = std::sin(theta);
roots(2) = c2_over_3 + Scalar(2) * rho * cos_theta;
roots(0) = c2_over_3 - rho * (cos_theta + s_sqrt3 * sin_theta);
roots(1) = c2_over_3 - rho * (cos_theta - s_sqrt3 * sin_theta);
}
template <typename Matrix, typename Vector>
void eigen33(const Matrix& mat, Matrix& evecs, Vector& evals) {
typedef typename Matrix::Scalar Scalar;
Scalar shift = mat.trace() / 3;
Matrix scaledMat = mat;
scaledMat.diagonal().array() -= shift;
Scalar scale = scaledMat.cwiseAbs().maxCoeff();
scale = std::max(scale, Scalar(1));
scaledMat /= scale;
computeRoots(scaledMat, evals);
if ((evals(2) - evals(0)) <= Eigen::NumTraits<Scalar>::epsilon()) {
evecs.setIdentity();
} else {
Matrix tmp;
tmp = scaledMat;
tmp.diagonal().array() -= evals(2);
evecs.col(2) = tmp.row(0).cross(tmp.row(1)).normalized();
tmp = scaledMat;
tmp.diagonal().array() -= evals(1);
evecs.col(1) = tmp.row(0).cross(tmp.row(1));
Scalar n1 = evecs.col(1).norm();
if (n1 <= Eigen::NumTraits<Scalar>::epsilon())
evecs.col(1) = evecs.col(2).unitOrthogonal();
else
evecs.col(1) /= n1;
evecs.col(1) = evecs.col(2).cross(evecs.col(1).cross(evecs.col(2))).normalized();
evecs.col(0) = evecs.col(2).cross(evecs.col(1));
}
evals *= scale;
evals.array() += shift;
}
static void BM_Eig33_Iterative(benchmark::State& state) {
Matrix3d A = Matrix3d::Random();
A = A.adjoint() * A;
SelfAdjointEigenSolver<Matrix3d> eig(A);
for (auto _ : state) {
eig.compute(A);
benchmark::DoNotOptimize(eig.eigenvalues().data());
}
}
BENCHMARK(BM_Eig33_Iterative);
static void BM_Eig33_Direct(benchmark::State& state) {
Matrix3d A = Matrix3d::Random();
A = A.adjoint() * A;
SelfAdjointEigenSolver<Matrix3d> eig(A);
for (auto _ : state) {
eig.computeDirect(A);
benchmark::DoNotOptimize(eig.eigenvalues().data());
}
}
BENCHMARK(BM_Eig33_Direct);
static void BM_Eig33_Custom(benchmark::State& state) {
Matrix3d A = Matrix3d::Random();
A = A.adjoint() * A;
Matrix3d evecs;
Vector3d evals;
for (auto _ : state) {
eigen33(A, evecs, evals);
benchmark::DoNotOptimize(evals.data());
}
}
BENCHMARK(BM_Eig33_Custom);

71
benchmarks/geometry.cpp Normal file
View File

@@ -0,0 +1,71 @@
#include <benchmark/benchmark.h>
#include <Eigen/Geometry>
using namespace Eigen;
// Helper to get dimension from various transform types
template <typename T>
struct TransformDim {
static constexpr int value = T::Dim;
};
template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
struct TransformDim<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>> {
static constexpr int value = Rows;
};
template <typename Transformation, int N>
static void BM_TransformData(benchmark::State& state) {
typedef typename Transformation::Scalar Scalar;
constexpr int Dim = TransformDim<Transformation>::value;
Transformation t;
if constexpr (std::is_same_v<Transformation, Matrix<Scalar, Dim, Dim>>) {
t.setRandom();
} else {
Matrix<Scalar, Dim, Dim + 1> mat;
mat.setRandom();
t = Transformation(mat);
}
Matrix<Scalar, Dim, N> data;
data.setRandom();
for (auto _ : state) {
data = t * data;
benchmark::DoNotOptimize(data.data());
}
}
// For quaternion: apply per-column
template <typename Scalar, int N>
static void BM_QuatTransform(benchmark::State& state) {
Quaternion<Scalar> q;
q.setIdentity();
Matrix<Scalar, 3, N> data;
data.setRandom();
for (auto _ : state) {
for (int i = 0; i < N; ++i) data.col(i) = q * data.col(i);
benchmark::DoNotOptimize(data.data());
}
}
// Use typedefs to avoid commas in macro arguments
typedef Transform<float, 3, Isometry> Isometry3f_t;
typedef Transform<float, 3, Affine> Affine3f_t;
typedef Transform<float, 3, AffineCompact> AffineCompact3f_t;
typedef Matrix<float, 3, 3> Matrix3f_t;
BENCHMARK(BM_TransformData<Isometry3f_t, 1>);
BENCHMARK(BM_TransformData<Isometry3f_t, 4>);
BENCHMARK(BM_TransformData<Isometry3f_t, 8>);
BENCHMARK(BM_TransformData<Affine3f_t, 1>);
BENCHMARK(BM_TransformData<Affine3f_t, 4>);
BENCHMARK(BM_TransformData<Affine3f_t, 8>);
BENCHMARK(BM_TransformData<AffineCompact3f_t, 1>);
BENCHMARK(BM_TransformData<AffineCompact3f_t, 4>);
BENCHMARK(BM_TransformData<AffineCompact3f_t, 8>);
BENCHMARK(BM_TransformData<Matrix3f_t, 1>);
BENCHMARK(BM_TransformData<Matrix3f_t, 4>);
BENCHMARK(BM_TransformData<Matrix3f_t, 8>);
BENCHMARK(BM_QuatTransform<float, 1>);
BENCHMARK(BM_QuatTransform<float, 4>);
BENCHMARK(BM_QuatTransform<float, 8>);

View File

@@ -0,0 +1,51 @@
#include <Eigen/Core>
#include <cstdio>
using namespace Eigen;
using namespace Eigen::internal;
int main() {
printf("%-8s %-8s %-8s | %-8s %-8s %-8s | %-8s %-8s %-8s | %-8s %-8s %-8s\n", "m", "n", "k", "kc_f", "mc_f", "nc_f",
"kc_d", "mc_d", "nc_d", "mr_f", "nr_f", "LhsPr_f");
// Print gebp_traits info
{
using Traits = gebp_traits<float, float>;
printf("Float traits: mr=%d, nr=%d, LhsProgress=%d, RhsProgress=%d, NumberOfRegisters=%d\n", Traits::mr, Traits::nr,
Traits::LhsProgress, Traits::RhsProgress, Traits::NumberOfRegisters);
}
{
using Traits = gebp_traits<double, double>;
printf("Double traits: mr=%d, nr=%d, LhsProgress=%d, RhsProgress=%d, NumberOfRegisters=%d\n", Traits::mr,
Traits::nr, Traits::LhsProgress, Traits::RhsProgress, Traits::NumberOfRegisters);
}
// Print cache sizes
std::ptrdiff_t l1, l2, l3;
manage_caching_sizes(GetAction, &l1, &l2, &l3);
printf("Cache sizes: L1=%ld, L2=%ld, L3=%ld\n", (long)l1, (long)l2, (long)l3);
for (int size : {8, 16, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 384, 448, 512, 768, 1024, 1536, 2048}) {
{
Index kf = size, mf = size, nf = size;
computeProductBlockingSizes<float, float>(kf, mf, nf);
Index kd = size, md = size, nd = size;
computeProductBlockingSizes<double, double>(kd, md, nd);
printf("%-8d %-8d %-8d | %-8ld %-8ld %-8ld | %-8ld %-8ld %-8ld\n", size, size, size, (long)kf, (long)mf, (long)nf,
(long)kd, (long)md, (long)nd);
}
}
// Non-square
for (auto [m, n, k] :
std::initializer_list<std::tuple<int, int, int>>{{64, 64, 1024}, {1024, 64, 64}, {64, 1024, 64}}) {
Index kf = k, mf = m, nf = n;
computeProductBlockingSizes<float, float>(kf, mf, nf);
Index kd = k, md = m, nd = n;
computeProductBlockingSizes<double, double>(kd, md, nd);
printf("%-8d %-8d %-8d | %-8ld %-8ld %-8ld | %-8ld %-8ld %-8ld\n", m, n, k, (long)kf, (long)mf, (long)nf, (long)kd,
(long)md, (long)nd);
}
return 0;
}

38
benchmarks/quatmul.cpp Normal file
View File

@@ -0,0 +1,38 @@
#include <benchmark/benchmark.h>
#include <Eigen/Core>
#include <Eigen/Geometry>
using namespace Eigen;
template <typename Quat>
EIGEN_DONT_INLINE void quatmul_default(const Quat& a, const Quat& b, Quat& c) {
c = a * b;
}
template <typename Quat>
EIGEN_DONT_INLINE void quatmul_novec(const Quat& a, const Quat& b, Quat& c) {
c = internal::quat_product<0, Quat, Quat, typename Quat::Scalar>::run(a, b);
}
template <typename Quat>
static void BM_QuatMul_Default(benchmark::State& state) {
Quat a(4, 1, 2, 3), b(2, 3, 4, 5), c;
for (auto _ : state) {
quatmul_default(a, b, c);
benchmark::DoNotOptimize(c.coeffs().data());
}
}
template <typename Quat>
static void BM_QuatMul_NoVec(benchmark::State& state) {
Quat a(4, 1, 2, 3), b(2, 3, 4, 5), c;
for (auto _ : state) {
quatmul_novec(a, b, c);
benchmark::DoNotOptimize(c.coeffs().data());
}
}
BENCHMARK(BM_QuatMul_Default<Quaternionf>);
BENCHMARK(BM_QuatMul_NoVec<Quaternionf>);
BENCHMARK(BM_QuatMul_Default<Quaterniond>);
BENCHMARK(BM_QuatMul_NoVec<Quaterniond>);

View File

@@ -0,0 +1,64 @@
#include <benchmark/benchmark.h>
#include <Eigen/Sparse>
using namespace Eigen;
typedef double Scalar;
typedef SparseMatrix<Scalar> SpMat;
typedef Matrix<Scalar, Dynamic, 1> DenseVec;
static void fillMatrix(int nnzPerCol, int rows, int cols, SpMat& dst) {
dst.resize(rows, cols);
dst.reserve(VectorXi::Constant(cols, nnzPerCol));
for (int j = 0; j < cols; ++j) {
std::set<int> used;
for (int i = 0; i < nnzPerCol; ++i) {
int row;
do {
row = internal::random<int>(0, rows - 1);
} while (used.count(row));
used.insert(row);
dst.insert(row, j) = internal::random<Scalar>();
}
}
dst.makeCompressed();
}
static void BM_SpMV(benchmark::State& state) {
int n = state.range(0);
int nnzPerCol = state.range(1);
SpMat sm(n, n);
fillMatrix(nnzPerCol, n, n, sm);
DenseVec v = DenseVec::Random(n);
DenseVec res(n);
for (auto _ : state) {
res.noalias() = sm * v;
benchmark::DoNotOptimize(res.data());
}
state.counters["nnz"] = sm.nonZeros();
}
static void BM_SpMV_Transpose(benchmark::State& state) {
int n = state.range(0);
int nnzPerCol = state.range(1);
SpMat sm(n, n);
fillMatrix(nnzPerCol, n, n, sm);
DenseVec v = DenseVec::Random(n);
DenseVec res(n);
for (auto _ : state) {
res.noalias() = sm.transpose() * v;
benchmark::DoNotOptimize(res.data());
}
state.counters["nnz"] = sm.nonZeros();
}
static void SpMVSizes(::benchmark::Benchmark* b) {
for (int n : {1000, 10000, 100000}) {
for (int nnz : {7, 20, 50}) {
b->Args({n, nnz});
}
}
}
BENCHMARK(BM_SpMV)->Apply(SpMVSizes);
BENCHMARK(BM_SpMV_Transpose)->Apply(SpMVSizes);

View File

@@ -0,0 +1,49 @@
#include <benchmark/benchmark.h>
#include <Eigen/Sparse>
#include <set>
using namespace Eigen;
typedef double Scalar;
typedef SparseMatrix<Scalar> SpMat;
static void fillMatrix(int nnzPerCol, int rows, int cols, SpMat& dst) {
dst.resize(rows, cols);
dst.reserve(VectorXi::Constant(cols, nnzPerCol));
for (int j = 0; j < cols; ++j) {
std::set<int> used;
for (int i = 0; i < nnzPerCol; ++i) {
int row;
do {
row = internal::random<int>(0, rows - 1);
} while (used.count(row));
used.insert(row);
dst.insert(row, j) = internal::random<Scalar>();
}
}
dst.makeCompressed();
}
static void BM_SparseMM(benchmark::State& state) {
int n = state.range(0);
int nnzPerCol = state.range(1);
SpMat sm1(n, n), sm2(n, n), sm3(n, n);
fillMatrix(nnzPerCol, n, n, sm1);
fillMatrix(nnzPerCol, n, n, sm2);
for (auto _ : state) {
sm3 = sm1 * sm2;
benchmark::DoNotOptimize(sm3.valuePtr());
}
state.counters["nnz_A"] = sm1.nonZeros();
state.counters["nnz_B"] = sm2.nonZeros();
}
static void SpMMSizes(::benchmark::Benchmark* b) {
for (int n : {1000, 10000}) {
for (int nnz : {4, 6, 10}) {
b->Args({n, nnz});
}
}
}
BENCHMARK(BM_SparseMM)->Apply(SpMMSizes);

View File

@@ -0,0 +1,45 @@
#include <benchmark/benchmark.h>
#include <Eigen/Sparse>
#include <set>
using namespace Eigen;
typedef double Scalar;
typedef SparseMatrix<Scalar> SpMat;
static void fillMatrix(float density, int rows, int cols, SpMat& dst) {
dst.resize(rows, cols);
dst.reserve(static_cast<int>(rows * cols * density));
for (int j = 0; j < cols; ++j) {
for (int i = 0; i < rows; ++i) {
if (internal::random<float>(0, 1) < density) {
dst.insert(i, j) = internal::random<Scalar>();
}
}
}
dst.makeCompressed();
}
static void BM_SparseTranspose(benchmark::State& state) {
int n = state.range(0);
float density = state.range(1) / 10000.0f;
SpMat sm(n, n), result(n, n);
fillMatrix(density, n, n, sm);
for (auto _ : state) {
result = sm.transpose();
benchmark::DoNotOptimize(result.valuePtr());
}
state.counters["nnz"] = sm.nonZeros();
state.counters["density%"] = density * 100;
}
static void TransposeSizes(::benchmark::Benchmark* b) {
// Args: {size, density*10000}
for (int n : {1000, 10000}) {
for (int d : {100, 50, 10, 4}) { // 1%, 0.5%, 0.1%, 0.04%
b->Args({n, d});
}
}
}
BENCHMARK(BM_SparseTranspose)->Apply(TransposeSizes);