Add new benchmarks for Core, LU, and QR operations

libeigen/eigen!2177

Closes #3035

Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
Rasmus Munk Larsen
2026-02-22 12:19:37 -08:00
parent 8c35441f18
commit 1f49bf96cf
15 changed files with 1355 additions and 2 deletions

View File

@@ -5,3 +5,13 @@ eigen_add_benchmark(bench_vecadd bench_vecadd.cpp)
eigen_add_benchmark(bench_trsm bench_trsm.cpp)
eigen_add_benchmark(bench_reverse bench_reverse.cpp)
eigen_add_benchmark(bench_move_semantics bench_move_semantics.cpp)
eigen_add_benchmark(bench_reductions bench_reductions.cpp)
eigen_add_benchmark(bench_dot bench_dot.cpp)
eigen_add_benchmark(bench_cwise_math bench_cwise_math.cpp)
eigen_add_benchmark(bench_broadcasting bench_broadcasting.cpp)
eigen_add_benchmark(bench_block_ops bench_block_ops.cpp)
eigen_add_benchmark(bench_map bench_map.cpp)
eigen_add_benchmark(bench_diagonal bench_diagonal.cpp)
eigen_add_benchmark(bench_triangular_product bench_triangular_product.cpp)
eigen_add_benchmark(bench_selfadjoint_product bench_selfadjoint_product.cpp)
eigen_add_benchmark(bench_construction bench_construction.cpp)

View File

@@ -0,0 +1,89 @@
// Benchmarks for block extraction and assignment operations.
//
// Tests sub-matrix views: block(), topRows(), leftCols(), middleCols().
// Measures expression template overhead for read and write patterns.
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
// Read a block and assign to a separate matrix (forces evaluation).
template <typename Scalar>
static void BM_BlockRead(benchmark::State& state) {
const Index n = state.range(0);
const Index block_size = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat src = Mat::Random(n, n);
Mat dst(block_size, block_size);
const Index off = (n - block_size) / 2;
for (auto _ : state) {
dst = src.block(off, off, block_size, block_size);
benchmark::DoNotOptimize(dst.data());
}
state.SetBytesProcessed(state.iterations() * block_size * block_size * sizeof(Scalar));
}
// Write into a block of a larger matrix.
template <typename Scalar>
static void BM_BlockWrite(benchmark::State& state) {
const Index n = state.range(0);
const Index block_size = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat dst = Mat::Random(n, n);
Mat src = Mat::Random(block_size, block_size);
const Index off = (n - block_size) / 2;
for (auto _ : state) {
dst.block(off, off, block_size, block_size) = src;
benchmark::DoNotOptimize(dst.data());
}
state.SetBytesProcessed(state.iterations() * block_size * block_size * sizeof(Scalar));
}
// topRows extraction.
template <typename Scalar>
static void BM_TopRows(benchmark::State& state) {
const Index n = state.range(0);
const Index k = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat src = Mat::Random(n, n);
Mat dst(k, n);
for (auto _ : state) {
dst = src.topRows(k);
benchmark::DoNotOptimize(dst.data());
}
state.SetBytesProcessed(state.iterations() * k * n * sizeof(Scalar));
}
// leftCols extraction.
template <typename Scalar>
static void BM_LeftCols(benchmark::State& state) {
const Index n = state.range(0);
const Index k = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat src = Mat::Random(n, n);
Mat dst(n, k);
for (auto _ : state) {
dst = src.leftCols(k);
benchmark::DoNotOptimize(dst.data());
}
state.SetBytesProcessed(state.iterations() * n * k * sizeof(Scalar));
}
static void BlockSizes(::benchmark::Benchmark* b) {
// (matrix_size, block_size)
for (int n : {256, 512, 1024}) {
for (int bs : {16, 64, 128}) {
if (bs <= n) b->Args({n, bs});
}
}
}
BENCHMARK(BM_BlockRead<float>)->Apply(BlockSizes)->Name("BlockRead_float");
BENCHMARK(BM_BlockRead<double>)->Apply(BlockSizes)->Name("BlockRead_double");
BENCHMARK(BM_BlockWrite<float>)->Apply(BlockSizes)->Name("BlockWrite_float");
BENCHMARK(BM_BlockWrite<double>)->Apply(BlockSizes)->Name("BlockWrite_double");
BENCHMARK(BM_TopRows<float>)->Apply(BlockSizes)->Name("TopRows_float");
BENCHMARK(BM_TopRows<double>)->Apply(BlockSizes)->Name("TopRows_double");
BENCHMARK(BM_LeftCols<float>)->Apply(BlockSizes)->Name("LeftCols_float");
BENCHMARK(BM_LeftCols<double>)->Apply(BlockSizes)->Name("LeftCols_double");

View File

@@ -0,0 +1,192 @@
// Benchmarks for colwise/rowwise reductions and broadcasting operations.
//
// Tests vectorwise reductions (sum, mean, norm, minCoeff, maxCoeff) and
// broadcasting arithmetic (rowwise += vec, colwise -= vec, rowwise *= vec).
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
// --- Colwise reductions (reduce each column to a scalar) ---
template <typename Scalar>
static void BM_ColwiseSum(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat m = Mat::Random(rows, cols);
Matrix<Scalar, 1, Dynamic> result(cols);
for (auto _ : state) {
result = m.colwise().sum();
benchmark::DoNotOptimize(result.data());
}
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar));
}
template <typename Scalar>
static void BM_ColwiseMean(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat m = Mat::Random(rows, cols);
Matrix<Scalar, 1, Dynamic> result(cols);
for (auto _ : state) {
result = m.colwise().mean();
benchmark::DoNotOptimize(result.data());
}
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar));
}
template <typename Scalar>
static void BM_ColwiseNorm(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat m = Mat::Random(rows, cols);
Matrix<Scalar, 1, Dynamic> result(cols);
for (auto _ : state) {
result = m.colwise().norm();
benchmark::DoNotOptimize(result.data());
}
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar));
}
template <typename Scalar>
static void BM_ColwiseMinCoeff(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat m = Mat::Random(rows, cols);
Matrix<Scalar, 1, Dynamic> result(cols);
for (auto _ : state) {
result = m.colwise().minCoeff();
benchmark::DoNotOptimize(result.data());
}
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar));
}
template <typename Scalar>
static void BM_ColwiseMaxCoeff(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat m = Mat::Random(rows, cols);
Matrix<Scalar, 1, Dynamic> result(cols);
for (auto _ : state) {
result = m.colwise().maxCoeff();
benchmark::DoNotOptimize(result.data());
}
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar));
}
// --- Rowwise reductions (reduce each row to a scalar) ---
template <typename Scalar>
static void BM_RowwiseSum(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat m = Mat::Random(rows, cols);
Matrix<Scalar, Dynamic, 1> result(rows);
for (auto _ : state) {
result = m.rowwise().sum();
benchmark::DoNotOptimize(result.data());
}
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar));
}
template <typename Scalar>
static void BM_RowwiseNorm(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat m = Mat::Random(rows, cols);
Matrix<Scalar, Dynamic, 1> result(rows);
for (auto _ : state) {
result = m.rowwise().norm();
benchmark::DoNotOptimize(result.data());
}
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar));
}
// --- Broadcasting operations ---
template <typename Scalar>
static void BM_RowwiseBroadcastAdd(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
using Vec = Matrix<Scalar, 1, Dynamic>;
Mat m = Mat::Random(rows, cols);
Vec v = Vec::Random(cols);
for (auto _ : state) {
m.noalias() = m.rowwise() + v;
benchmark::DoNotOptimize(m.data());
}
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar) * 2);
}
template <typename Scalar>
static void BM_ColwiseBroadcastAdd(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
using Vec = Matrix<Scalar, Dynamic, 1>;
Mat m = Mat::Random(rows, cols);
Vec v = Vec::Random(rows);
for (auto _ : state) {
m.noalias() = m.colwise() + v;
benchmark::DoNotOptimize(m.data());
}
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar) * 2);
}
template <typename Scalar>
static void BM_RowwiseBroadcastMul(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat m = Mat::Random(rows, cols);
Array<Scalar, 1, Dynamic> v = Array<Scalar, 1, Dynamic>::Random(cols);
for (auto _ : state) {
m.array().rowwise() *= v;
benchmark::DoNotOptimize(m.data());
}
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar) * 2);
}
// --- Size configurations ---
static void BroadcastSizes(::benchmark::Benchmark* b) {
// Square matrices
for (int n : {64, 128, 256, 512, 1024}) b->Args({n, n});
// Tall-thin (many rows, few cols)
b->Args({10000, 32});
// Short-wide (few rows, many cols)
b->Args({32, 10000});
}
// --- Register: float ---
BENCHMARK(BM_ColwiseSum<float>)->Apply(BroadcastSizes)->Name("ColwiseSum_float");
BENCHMARK(BM_ColwiseMean<float>)->Apply(BroadcastSizes)->Name("ColwiseMean_float");
BENCHMARK(BM_ColwiseNorm<float>)->Apply(BroadcastSizes)->Name("ColwiseNorm_float");
BENCHMARK(BM_ColwiseMinCoeff<float>)->Apply(BroadcastSizes)->Name("ColwiseMinCoeff_float");
BENCHMARK(BM_ColwiseMaxCoeff<float>)->Apply(BroadcastSizes)->Name("ColwiseMaxCoeff_float");
BENCHMARK(BM_RowwiseSum<float>)->Apply(BroadcastSizes)->Name("RowwiseSum_float");
BENCHMARK(BM_RowwiseNorm<float>)->Apply(BroadcastSizes)->Name("RowwiseNorm_float");
BENCHMARK(BM_RowwiseBroadcastAdd<float>)->Apply(BroadcastSizes)->Name("RowwiseBroadcastAdd_float");
BENCHMARK(BM_ColwiseBroadcastAdd<float>)->Apply(BroadcastSizes)->Name("ColwiseBroadcastAdd_float");
BENCHMARK(BM_RowwiseBroadcastMul<float>)->Apply(BroadcastSizes)->Name("RowwiseBroadcastMul_float");
// --- Register: double ---
BENCHMARK(BM_ColwiseSum<double>)->Apply(BroadcastSizes)->Name("ColwiseSum_double");
BENCHMARK(BM_ColwiseMean<double>)->Apply(BroadcastSizes)->Name("ColwiseMean_double");
BENCHMARK(BM_ColwiseNorm<double>)->Apply(BroadcastSizes)->Name("ColwiseNorm_double");
BENCHMARK(BM_ColwiseMinCoeff<double>)->Apply(BroadcastSizes)->Name("ColwiseMinCoeff_double");
BENCHMARK(BM_ColwiseMaxCoeff<double>)->Apply(BroadcastSizes)->Name("ColwiseMaxCoeff_double");
BENCHMARK(BM_RowwiseSum<double>)->Apply(BroadcastSizes)->Name("RowwiseSum_double");
BENCHMARK(BM_RowwiseNorm<double>)->Apply(BroadcastSizes)->Name("RowwiseNorm_double");
BENCHMARK(BM_RowwiseBroadcastAdd<double>)->Apply(BroadcastSizes)->Name("RowwiseBroadcastAdd_double");
BENCHMARK(BM_ColwiseBroadcastAdd<double>)->Apply(BroadcastSizes)->Name("ColwiseBroadcastAdd_double");
BENCHMARK(BM_RowwiseBroadcastMul<double>)->Apply(BroadcastSizes)->Name("RowwiseBroadcastMul_double");

View File

@@ -0,0 +1,138 @@
// Benchmarks for matrix initialization / construction.
//
// Tests setZero, setRandom, setIdentity, LinSpaced, Zero(), Constant()
// for both dynamic and small fixed-size matrices.
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
// --- Dynamic-size construction ---
template <typename Scalar>
static void BM_SetZero(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, Dynamic> m(n, n);
for (auto _ : state) {
m.setZero();
benchmark::DoNotOptimize(m.data());
}
state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar));
}
template <typename Scalar>
static void BM_SetRandom(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, Dynamic> m(n, n);
for (auto _ : state) {
m.setRandom();
benchmark::DoNotOptimize(m.data());
}
state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar));
}
template <typename Scalar>
static void BM_SetIdentity(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, Dynamic> m(n, n);
for (auto _ : state) {
m.setIdentity();
benchmark::DoNotOptimize(m.data());
}
state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar));
}
template <typename Scalar>
static void BM_SetConstant(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, Dynamic> m(n, n);
for (auto _ : state) {
m.setConstant(Scalar(42));
benchmark::DoNotOptimize(m.data());
}
state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar));
}
template <typename Scalar>
static void BM_LinSpaced(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, 1> v(n);
for (auto _ : state) {
v = Matrix<Scalar, Dynamic, 1>::LinSpaced(n, Scalar(0), Scalar(1));
benchmark::DoNotOptimize(v.data());
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
}
// --- Fixed-size construction ---
template <typename Scalar, int N>
static void BM_FixedSetZero(benchmark::State& state) {
Matrix<Scalar, N, N> m;
for (auto _ : state) {
m.setZero();
benchmark::DoNotOptimize(m.data());
}
state.SetBytesProcessed(state.iterations() * N * N * sizeof(Scalar));
}
template <typename Scalar, int N>
static void BM_FixedSetRandom(benchmark::State& state) {
Matrix<Scalar, N, N> m;
for (auto _ : state) {
m.setRandom();
benchmark::DoNotOptimize(m.data());
}
state.SetBytesProcessed(state.iterations() * N * N * sizeof(Scalar));
}
template <typename Scalar, int N>
static void BM_FixedSetIdentity(benchmark::State& state) {
Matrix<Scalar, N, N> m;
for (auto _ : state) {
m.setIdentity();
benchmark::DoNotOptimize(m.data());
}
state.SetBytesProcessed(state.iterations() * N * N * sizeof(Scalar));
}
// --- Size configurations ---
static void DynamicSizes(::benchmark::Benchmark* b) {
for (int n : {4, 8, 16, 32, 64, 128, 256, 512, 1024}) b->Arg(n);
}
static void VectorSizes(::benchmark::Benchmark* b) {
for (int n : {64, 256, 1024, 4096, 16384, 65536}) b->Arg(n);
}
// --- Register: dynamic float ---
BENCHMARK(BM_SetZero<float>)->Apply(DynamicSizes)->Name("SetZero_float");
BENCHMARK(BM_SetRandom<float>)->Apply(DynamicSizes)->Name("SetRandom_float");
BENCHMARK(BM_SetIdentity<float>)->Apply(DynamicSizes)->Name("SetIdentity_float");
BENCHMARK(BM_SetConstant<float>)->Apply(DynamicSizes)->Name("SetConstant_float");
BENCHMARK(BM_LinSpaced<float>)->Apply(VectorSizes)->Name("LinSpaced_float");
// --- Register: dynamic double ---
BENCHMARK(BM_SetZero<double>)->Apply(DynamicSizes)->Name("SetZero_double");
BENCHMARK(BM_SetRandom<double>)->Apply(DynamicSizes)->Name("SetRandom_double");
BENCHMARK(BM_SetIdentity<double>)->Apply(DynamicSizes)->Name("SetIdentity_double");
BENCHMARK(BM_SetConstant<double>)->Apply(DynamicSizes)->Name("SetConstant_double");
BENCHMARK(BM_LinSpaced<double>)->Apply(VectorSizes)->Name("LinSpaced_double");
// --- Register: fixed-size float ---
BENCHMARK(BM_FixedSetZero<float, 2>)->Name("FixedSetZero_float_2x2");
BENCHMARK(BM_FixedSetZero<float, 3>)->Name("FixedSetZero_float_3x3");
BENCHMARK(BM_FixedSetZero<float, 4>)->Name("FixedSetZero_float_4x4");
BENCHMARK(BM_FixedSetZero<float, 8>)->Name("FixedSetZero_float_8x8");
BENCHMARK(BM_FixedSetRandom<float, 4>)->Name("FixedSetRandom_float_4x4");
BENCHMARK(BM_FixedSetIdentity<float, 4>)->Name("FixedSetIdentity_float_4x4");
// --- Register: fixed-size double ---
BENCHMARK(BM_FixedSetZero<double, 2>)->Name("FixedSetZero_double_2x2");
BENCHMARK(BM_FixedSetZero<double, 3>)->Name("FixedSetZero_double_3x3");
BENCHMARK(BM_FixedSetZero<double, 4>)->Name("FixedSetZero_double_4x4");
BENCHMARK(BM_FixedSetZero<double, 8>)->Name("FixedSetZero_double_8x8");
BENCHMARK(BM_FixedSetRandom<double, 4>)->Name("FixedSetRandom_double_4x4");
BENCHMARK(BM_FixedSetIdentity<double, 4>)->Name("FixedSetIdentity_double_4x4");

View File

@@ -0,0 +1,120 @@
// Benchmarks for vectorized coefficient-wise math functions.
//
// Each function is benchmarked on ArrayXf/ArrayXd with inputs chosen to
// stay in the valid domain and avoid NaN/Inf.
#include <benchmark/benchmark.h>
#include <Eigen/Core>
#include <unsupported/Eigen/SpecialFunctions>
using namespace Eigen;
// Macro to define a benchmark for a unary array operation.
// NAME: benchmark function suffix (e.g. Exp)
// EXPR: expression applied to the array (e.g. a.exp())
// LO, HI: input range [LO, HI] mapped from the default Random() range [-1,1]
#define BENCH_CWISE_UNARY(NAME, EXPR, LO, HI) \
template <typename Scalar> \
static void BM_##NAME(benchmark::State& state) { \
const Index n = state.range(0); \
using Arr = Array<Scalar, Dynamic, 1>; \
/* Map Random [-1,1] to [LO, HI] */ \
Arr a = (Arr::Random(n) + Scalar(1)) * Scalar((double(HI) - double(LO)) / 2.0) + Scalar(LO); \
Arr b(n); \
for (auto _ : state) { \
b = EXPR; \
benchmark::DoNotOptimize(b.data()); \
} \
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar) * 2); \
}
// Transcendental functions
BENCH_CWISE_UNARY(Exp, a.exp(), -10, 10)
BENCH_CWISE_UNARY(Log, a.log(), 0.01, 100)
BENCH_CWISE_UNARY(Log1p, a.log1p(), -0.5, 100)
BENCH_CWISE_UNARY(Sqrt, a.sqrt(), 0, 100)
BENCH_CWISE_UNARY(Rsqrt, a.rsqrt(), 0.01, 100)
// Trigonometric functions
BENCH_CWISE_UNARY(Sin, a.sin(), -3.14, 3.14)
BENCH_CWISE_UNARY(Cos, a.cos(), -3.14, 3.14)
BENCH_CWISE_UNARY(Tan, a.tan(), -1.5, 1.5)
BENCH_CWISE_UNARY(Asin, a.asin(), -0.99, 0.99)
BENCH_CWISE_UNARY(Atan, a.atan(), -10, 10)
// Hyperbolic / special
BENCH_CWISE_UNARY(Tanh, a.tanh(), -5, 5)
BENCH_CWISE_UNARY(Erf, Eigen::erf(a), -4, 4)
// Simple operations (should be very fast / memory-bound)
BENCH_CWISE_UNARY(Abs, a.abs(), -100, 100)
BENCH_CWISE_UNARY(Square, a.square(), -100, 100)
BENCH_CWISE_UNARY(Cube, a.cube(), -10, 10)
BENCH_CWISE_UNARY(Ceil, a.ceil(), -100, 100)
BENCH_CWISE_UNARY(Floor, a.floor(), -100, 100)
BENCH_CWISE_UNARY(Round, a.round(), -100, 100)
// Sigmoid: 1 / (1 + exp(-x)), common in ML.
BENCH_CWISE_UNARY(Sigmoid, Scalar(1) / (Scalar(1) + (-a).exp()), -10, 10)
// Power: array^scalar
template <typename Scalar>
static void BM_Pow(benchmark::State& state) {
const Index n = state.range(0);
using Arr = Array<Scalar, Dynamic, 1>;
Arr a = (Arr::Random(n) + Scalar(1)) * Scalar(50); // [0, 100]
Arr b(n);
for (auto _ : state) {
b = a.pow(Scalar(2.5));
benchmark::DoNotOptimize(b.data());
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar) * 2);
}
static void CwiseSizes(::benchmark::Benchmark* b) {
for (int n : {1024, 4096, 16384, 65536, 262144, 1048576}) b->Arg(n);
}
// --- Register float ---
BENCHMARK(BM_Exp<float>)->Apply(CwiseSizes)->Name("Exp_float");
BENCHMARK(BM_Log<float>)->Apply(CwiseSizes)->Name("Log_float");
BENCHMARK(BM_Log1p<float>)->Apply(CwiseSizes)->Name("Log1p_float");
BENCHMARK(BM_Sqrt<float>)->Apply(CwiseSizes)->Name("Sqrt_float");
BENCHMARK(BM_Rsqrt<float>)->Apply(CwiseSizes)->Name("Rsqrt_float");
BENCHMARK(BM_Sin<float>)->Apply(CwiseSizes)->Name("Sin_float");
BENCHMARK(BM_Cos<float>)->Apply(CwiseSizes)->Name("Cos_float");
BENCHMARK(BM_Tan<float>)->Apply(CwiseSizes)->Name("Tan_float");
BENCHMARK(BM_Asin<float>)->Apply(CwiseSizes)->Name("Asin_float");
BENCHMARK(BM_Atan<float>)->Apply(CwiseSizes)->Name("Atan_float");
BENCHMARK(BM_Tanh<float>)->Apply(CwiseSizes)->Name("Tanh_float");
BENCHMARK(BM_Erf<float>)->Apply(CwiseSizes)->Name("Erf_float");
BENCHMARK(BM_Abs<float>)->Apply(CwiseSizes)->Name("Abs_float");
BENCHMARK(BM_Square<float>)->Apply(CwiseSizes)->Name("Square_float");
BENCHMARK(BM_Cube<float>)->Apply(CwiseSizes)->Name("Cube_float");
BENCHMARK(BM_Ceil<float>)->Apply(CwiseSizes)->Name("Ceil_float");
BENCHMARK(BM_Floor<float>)->Apply(CwiseSizes)->Name("Floor_float");
BENCHMARK(BM_Round<float>)->Apply(CwiseSizes)->Name("Round_float");
BENCHMARK(BM_Sigmoid<float>)->Apply(CwiseSizes)->Name("Sigmoid_float");
BENCHMARK(BM_Pow<float>)->Apply(CwiseSizes)->Name("Pow_float");
// --- Register double ---
BENCHMARK(BM_Exp<double>)->Apply(CwiseSizes)->Name("Exp_double");
BENCHMARK(BM_Log<double>)->Apply(CwiseSizes)->Name("Log_double");
BENCHMARK(BM_Log1p<double>)->Apply(CwiseSizes)->Name("Log1p_double");
BENCHMARK(BM_Sqrt<double>)->Apply(CwiseSizes)->Name("Sqrt_double");
BENCHMARK(BM_Rsqrt<double>)->Apply(CwiseSizes)->Name("Rsqrt_double");
BENCHMARK(BM_Sin<double>)->Apply(CwiseSizes)->Name("Sin_double");
BENCHMARK(BM_Cos<double>)->Apply(CwiseSizes)->Name("Cos_double");
BENCHMARK(BM_Tan<double>)->Apply(CwiseSizes)->Name("Tan_double");
BENCHMARK(BM_Asin<double>)->Apply(CwiseSizes)->Name("Asin_double");
BENCHMARK(BM_Atan<double>)->Apply(CwiseSizes)->Name("Atan_double");
BENCHMARK(BM_Tanh<double>)->Apply(CwiseSizes)->Name("Tanh_double");
BENCHMARK(BM_Erf<double>)->Apply(CwiseSizes)->Name("Erf_double");
BENCHMARK(BM_Abs<double>)->Apply(CwiseSizes)->Name("Abs_double");
BENCHMARK(BM_Square<double>)->Apply(CwiseSizes)->Name("Square_double");
BENCHMARK(BM_Cube<double>)->Apply(CwiseSizes)->Name("Cube_double");
BENCHMARK(BM_Ceil<double>)->Apply(CwiseSizes)->Name("Ceil_double");
BENCHMARK(BM_Floor<double>)->Apply(CwiseSizes)->Name("Floor_double");
BENCHMARK(BM_Round<double>)->Apply(CwiseSizes)->Name("Round_double");
BENCHMARK(BM_Sigmoid<double>)->Apply(CwiseSizes)->Name("Sigmoid_double");
BENCHMARK(BM_Pow<double>)->Apply(CwiseSizes)->Name("Pow_double");

View File

@@ -0,0 +1,81 @@
// Benchmarks for diagonal operations.
//
// Tests diagonal extraction, diagonal-matrix product, and matrix-diagonal product.
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
// Extract diagonal from a square matrix and sum it.
template <typename Scalar>
static void BM_DiagonalExtract(benchmark::State& state) {
const Index n = state.range(0);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(n, n);
for (auto _ : state) {
Scalar s = A.diagonal().sum();
benchmark::DoNotOptimize(s);
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
}
// y = diag(d) * x (diagonal matrix times vector).
template <typename Scalar>
static void BM_DiagonalTimesVector(benchmark::State& state) {
const Index n = state.range(0);
using Vec = Matrix<Scalar, Dynamic, 1>;
Vec d = Vec::Random(n);
Vec x = Vec::Random(n);
Vec y(n);
for (auto _ : state) {
y = d.asDiagonal() * x;
benchmark::DoNotOptimize(y.data());
}
state.SetBytesProcessed(state.iterations() * 3 * n * sizeof(Scalar));
}
// C = diag(d) * A (diagonal matrix times dense matrix).
template <typename Scalar>
static void BM_DiagonalTimesMatrix(benchmark::State& state) {
const Index n = state.range(0);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
using Vec = Matrix<Scalar, Dynamic, 1>;
Vec d = Vec::Random(n);
Mat A = Mat::Random(n, n);
Mat C(n, n);
for (auto _ : state) {
C.noalias() = d.asDiagonal() * A;
benchmark::DoNotOptimize(C.data());
}
state.SetBytesProcessed(state.iterations() * 2 * n * n * sizeof(Scalar));
}
// C = A * diag(d) (dense matrix times diagonal matrix).
template <typename Scalar>
static void BM_MatrixTimesDiagonal(benchmark::State& state) {
const Index n = state.range(0);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
using Vec = Matrix<Scalar, Dynamic, 1>;
Vec d = Vec::Random(n);
Mat A = Mat::Random(n, n);
Mat C(n, n);
for (auto _ : state) {
C.noalias() = A * d.asDiagonal();
benchmark::DoNotOptimize(C.data());
}
state.SetBytesProcessed(state.iterations() * 2 * n * n * sizeof(Scalar));
}
static void Sizes(::benchmark::Benchmark* b) {
for (int n : {32, 64, 128, 256, 512, 1024}) b->Arg(n);
}
BENCHMARK(BM_DiagonalExtract<float>)->Apply(Sizes)->Name("DiagonalExtract_float");
BENCHMARK(BM_DiagonalExtract<double>)->Apply(Sizes)->Name("DiagonalExtract_double");
BENCHMARK(BM_DiagonalTimesVector<float>)->Apply(Sizes)->Name("DiagonalTimesVector_float");
BENCHMARK(BM_DiagonalTimesVector<double>)->Apply(Sizes)->Name("DiagonalTimesVector_double");
BENCHMARK(BM_DiagonalTimesMatrix<float>)->Apply(Sizes)->Name("DiagonalTimesMatrix_float");
BENCHMARK(BM_DiagonalTimesMatrix<double>)->Apply(Sizes)->Name("DiagonalTimesMatrix_double");
BENCHMARK(BM_MatrixTimesDiagonal<float>)->Apply(Sizes)->Name("MatrixTimesDiagonal_float");
BENCHMARK(BM_MatrixTimesDiagonal<double>)->Apply(Sizes)->Name("MatrixTimesDiagonal_double");

View File

@@ -0,0 +1,51 @@
// Benchmarks for dot product (BLAS-1 critical path).
//
// Flop count: 2n for real, 8n for complex.
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
template <typename Scalar>
double dotFlops(Index n) {
return (NumTraits<Scalar>::IsComplex ? 8.0 : 2.0) * n;
}
template <typename Scalar>
static void BM_Dot(benchmark::State& state) {
const Index n = state.range(0);
using Vec = Matrix<Scalar, Dynamic, 1>;
Vec a = Vec::Random(n);
Vec b = Vec::Random(n);
for (auto _ : state) {
Scalar d = a.dot(b);
benchmark::DoNotOptimize(d);
}
state.counters["GFLOPS"] = benchmark::Counter(dotFlops<Scalar>(n), benchmark::Counter::kIsIterationInvariantRate,
benchmark::Counter::kIs1000);
}
template <typename Scalar>
static void BM_SquaredNorm(benchmark::State& state) {
const Index n = state.range(0);
using Vec = Matrix<Scalar, Dynamic, 1>;
Vec a = Vec::Random(n);
for (auto _ : state) {
auto d = a.squaredNorm();
benchmark::DoNotOptimize(d);
}
state.counters["GFLOPS"] = benchmark::Counter(dotFlops<Scalar>(n), benchmark::Counter::kIsIterationInvariantRate,
benchmark::Counter::kIs1000);
}
static void DotSizes(::benchmark::Benchmark* b) {
for (int n : {64, 256, 1024, 4096, 16384, 65536, 262144, 1048576}) b->Arg(n);
}
BENCHMARK(BM_Dot<float>)->Apply(DotSizes)->Name("Dot_float");
BENCHMARK(BM_Dot<double>)->Apply(DotSizes)->Name("Dot_double");
BENCHMARK(BM_Dot<std::complex<float>>)->Apply(DotSizes)->Name("Dot_cfloat");
BENCHMARK(BM_Dot<std::complex<double>>)->Apply(DotSizes)->Name("Dot_cdouble");
BENCHMARK(BM_SquaredNorm<float>)->Apply(DotSizes)->Name("SquaredNorm_float");
BENCHMARK(BM_SquaredNorm<double>)->Apply(DotSizes)->Name("SquaredNorm_double");

View File

@@ -0,0 +1,103 @@
// Benchmarks for Map and Ref with various strides.
//
// Compares contiguous Map vs strided Map vs owned matrix for basic
// operations (GEMV and vector sum).
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
// Sum a contiguous Map<VectorX>.
template <typename Scalar>
static void BM_MapContiguousSum(benchmark::State& state) {
const Index n = state.range(0);
std::vector<Scalar> buf(n);
Map<Matrix<Scalar, Dynamic, 1>> v(buf.data(), n);
v.setRandom();
for (auto _ : state) {
Scalar s = v.sum();
benchmark::DoNotOptimize(s);
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
}
// Sum a strided Map (InnerStride).
template <typename Scalar>
static void BM_MapStridedSum(benchmark::State& state) {
const Index n = state.range(0);
const Index stride = 3;
std::vector<Scalar> buf(n * stride);
Map<Matrix<Scalar, Dynamic, 1>, 0, InnerStride<>> v(buf.data(), n, InnerStride<>(stride));
v.setRandom();
for (auto _ : state) {
Scalar s = v.sum();
benchmark::DoNotOptimize(s);
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
}
// Sum an owned VectorX (baseline).
template <typename Scalar>
static void BM_OwnedSum(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
for (auto _ : state) {
Scalar s = v.sum();
benchmark::DoNotOptimize(s);
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
}
// GEMV through contiguous Map<MatrixX>.
template <typename Scalar>
static void BM_MapGemv(benchmark::State& state) {
const Index n = state.range(0);
std::vector<Scalar> buf(n * n);
Map<Matrix<Scalar, Dynamic, Dynamic>> A(buf.data(), n, n);
A.setRandom();
Matrix<Scalar, Dynamic, 1> x = Matrix<Scalar, Dynamic, 1>::Random(n);
Matrix<Scalar, Dynamic, 1> y = Matrix<Scalar, Dynamic, 1>::Random(n);
for (auto _ : state) {
y.noalias() += A * x;
benchmark::DoNotOptimize(y.data());
benchmark::ClobberMemory();
}
state.counters["GFLOPS"] =
benchmark::Counter(2.0 * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
// GEMV with owned matrix (baseline).
template <typename Scalar>
static void BM_OwnedGemv(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, Dynamic> A = Matrix<Scalar, Dynamic, Dynamic>::Random(n, n);
Matrix<Scalar, Dynamic, 1> x = Matrix<Scalar, Dynamic, 1>::Random(n);
Matrix<Scalar, Dynamic, 1> y = Matrix<Scalar, Dynamic, 1>::Random(n);
for (auto _ : state) {
y.noalias() += A * x;
benchmark::DoNotOptimize(y.data());
benchmark::ClobberMemory();
}
state.counters["GFLOPS"] =
benchmark::Counter(2.0 * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
static void SumSizes(::benchmark::Benchmark* b) {
for (int n : {256, 1024, 4096, 16384, 65536, 262144, 1048576}) b->Arg(n);
}
static void GemvSizes(::benchmark::Benchmark* b) {
for (int n : {32, 128, 512, 1024}) b->Arg(n);
}
BENCHMARK(BM_MapContiguousSum<float>)->Apply(SumSizes)->Name("MapContiguousSum_float");
BENCHMARK(BM_MapStridedSum<float>)->Apply(SumSizes)->Name("MapStridedSum_float");
BENCHMARK(BM_OwnedSum<float>)->Apply(SumSizes)->Name("OwnedSum_float");
BENCHMARK(BM_MapContiguousSum<double>)->Apply(SumSizes)->Name("MapContiguousSum_double");
BENCHMARK(BM_MapStridedSum<double>)->Apply(SumSizes)->Name("MapStridedSum_double");
BENCHMARK(BM_OwnedSum<double>)->Apply(SumSizes)->Name("OwnedSum_double");
BENCHMARK(BM_MapGemv<float>)->Apply(GemvSizes)->Name("MapGemv_float");
BENCHMARK(BM_OwnedGemv<float>)->Apply(GemvSizes)->Name("OwnedGemv_float");
BENCHMARK(BM_MapGemv<double>)->Apply(GemvSizes)->Name("MapGemv_double");
BENCHMARK(BM_OwnedGemv<double>)->Apply(GemvSizes)->Name("OwnedGemv_double");

View File

@@ -0,0 +1,173 @@
// Benchmarks for full reductions: sum, prod, minCoeff, maxCoeff, mean,
// norm, squaredNorm, lpNorm<1>, lpNorm<Infinity>.
//
// These are memory-bandwidth-bound for large vectors, so we report
// bytes processed rather than FLOPS.
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
// --- Vector reductions (1-D) ---
template <typename Scalar>
static void BM_VectorSum(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
for (auto _ : state) {
Scalar s = v.sum();
benchmark::DoNotOptimize(s);
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
}
template <typename Scalar>
static void BM_VectorProd(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Constant(n, Scalar(1));
// Use values near 1 to avoid overflow/underflow.
v += Scalar(0.001) * Matrix<Scalar, Dynamic, 1>::Random(n);
for (auto _ : state) {
Scalar p = v.prod();
benchmark::DoNotOptimize(p);
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
}
template <typename Scalar>
static void BM_VectorMinCoeff(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
for (auto _ : state) {
Scalar m = v.minCoeff();
benchmark::DoNotOptimize(m);
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
}
template <typename Scalar>
static void BM_VectorMaxCoeff(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
for (auto _ : state) {
Scalar m = v.maxCoeff();
benchmark::DoNotOptimize(m);
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
}
template <typename Scalar>
static void BM_VectorMean(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
for (auto _ : state) {
Scalar m = v.mean();
benchmark::DoNotOptimize(m);
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
}
template <typename Scalar>
static void BM_VectorSquaredNorm(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
for (auto _ : state) {
Scalar s = v.squaredNorm();
benchmark::DoNotOptimize(s);
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
}
template <typename Scalar>
static void BM_VectorNorm(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
for (auto _ : state) {
Scalar s = v.norm();
benchmark::DoNotOptimize(s);
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
}
template <typename Scalar>
static void BM_VectorLpNorm1(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
for (auto _ : state) {
Scalar s = v.template lpNorm<1>();
benchmark::DoNotOptimize(s);
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
}
template <typename Scalar>
static void BM_VectorLpNormInf(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
for (auto _ : state) {
Scalar s = v.template lpNorm<Infinity>();
benchmark::DoNotOptimize(s);
}
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
}
// --- Matrix reductions (2-D) ---
template <typename Scalar>
static void BM_MatrixSum(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, Dynamic> m = Matrix<Scalar, Dynamic, Dynamic>::Random(n, n);
for (auto _ : state) {
Scalar s = m.sum();
benchmark::DoNotOptimize(s);
}
state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar));
}
template <typename Scalar>
static void BM_MatrixNorm(benchmark::State& state) {
const Index n = state.range(0);
Matrix<Scalar, Dynamic, Dynamic> m = Matrix<Scalar, Dynamic, Dynamic>::Random(n, n);
for (auto _ : state) {
Scalar s = m.norm();
benchmark::DoNotOptimize(s);
}
state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar));
}
// --- Size configurations ---
static void VectorSizes(::benchmark::Benchmark* b) {
for (int n : {64, 256, 1024, 4096, 16384, 65536, 262144, 1048576}) b->Arg(n);
}
static void MatrixSizes(::benchmark::Benchmark* b) {
for (int n : {8, 32, 64, 128, 256, 512, 1024}) b->Arg(n);
}
// --- Register: float ---
BENCHMARK(BM_VectorSum<float>)->Apply(VectorSizes)->Name("VectorSum_float");
BENCHMARK(BM_VectorProd<float>)->Apply(VectorSizes)->Name("VectorProd_float");
BENCHMARK(BM_VectorMinCoeff<float>)->Apply(VectorSizes)->Name("VectorMinCoeff_float");
BENCHMARK(BM_VectorMaxCoeff<float>)->Apply(VectorSizes)->Name("VectorMaxCoeff_float");
BENCHMARK(BM_VectorMean<float>)->Apply(VectorSizes)->Name("VectorMean_float");
BENCHMARK(BM_VectorSquaredNorm<float>)->Apply(VectorSizes)->Name("VectorSquaredNorm_float");
BENCHMARK(BM_VectorNorm<float>)->Apply(VectorSizes)->Name("VectorNorm_float");
BENCHMARK(BM_VectorLpNorm1<float>)->Apply(VectorSizes)->Name("VectorLpNorm1_float");
BENCHMARK(BM_VectorLpNormInf<float>)->Apply(VectorSizes)->Name("VectorLpNormInf_float");
BENCHMARK(BM_MatrixSum<float>)->Apply(MatrixSizes)->Name("MatrixSum_float");
BENCHMARK(BM_MatrixNorm<float>)->Apply(MatrixSizes)->Name("MatrixNorm_float");
// --- Register: double ---
BENCHMARK(BM_VectorSum<double>)->Apply(VectorSizes)->Name("VectorSum_double");
BENCHMARK(BM_VectorProd<double>)->Apply(VectorSizes)->Name("VectorProd_double");
BENCHMARK(BM_VectorMinCoeff<double>)->Apply(VectorSizes)->Name("VectorMinCoeff_double");
BENCHMARK(BM_VectorMaxCoeff<double>)->Apply(VectorSizes)->Name("VectorMaxCoeff_double");
BENCHMARK(BM_VectorMean<double>)->Apply(VectorSizes)->Name("VectorMean_double");
BENCHMARK(BM_VectorSquaredNorm<double>)->Apply(VectorSizes)->Name("VectorSquaredNorm_double");
BENCHMARK(BM_VectorNorm<double>)->Apply(VectorSizes)->Name("VectorNorm_double");
BENCHMARK(BM_VectorLpNorm1<double>)->Apply(VectorSizes)->Name("VectorLpNorm1_double");
BENCHMARK(BM_VectorLpNormInf<double>)->Apply(VectorSizes)->Name("VectorLpNormInf_double");
BENCHMARK(BM_MatrixSum<double>)->Apply(MatrixSizes)->Name("MatrixSum_double");
BENCHMARK(BM_MatrixNorm<double>)->Apply(MatrixSizes)->Name("MatrixNorm_double");

View File

@@ -0,0 +1,78 @@
// Benchmarks for self-adjoint (symmetric/hermitian) matrix operations.
//
// Tests SYMM (selfadjointView * dense) and rank-k updates.
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
// C = selfadjointView<Lower>(A) * B (SYMM)
template <typename Scalar>
static void BM_SYMM_Left(benchmark::State& state) {
const Index n = state.range(0);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(n, n);
A = (A + A.transpose()).eval() / Scalar(2);
Mat B = Mat::Random(n, n);
Mat C(n, n);
for (auto _ : state) {
C.noalias() = A.template selfadjointView<Lower>() * B;
benchmark::DoNotOptimize(C.data());
}
state.counters["GFLOPS"] =
benchmark::Counter(2.0 * n * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
// C = B * selfadjointView<Lower>(A)
template <typename Scalar>
static void BM_SYMM_Right(benchmark::State& state) {
const Index n = state.range(0);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(n, n);
A = (A + A.transpose()).eval() / Scalar(2);
Mat B = Mat::Random(n, n);
Mat C(n, n);
for (auto _ : state) {
C.noalias() = B * A.template selfadjointView<Lower>();
benchmark::DoNotOptimize(C.data());
}
state.counters["GFLOPS"] =
benchmark::Counter(2.0 * n * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
// Rank-k update: C.selfadjointView<Lower>().rankUpdate(A)
// Computes C += A * A^T
template <typename Scalar>
static void BM_RankUpdate(benchmark::State& state) {
const Index n = state.range(0);
const Index k = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(n, k);
Mat C = Mat::Zero(n, n);
for (auto _ : state) {
C.template selfadjointView<Lower>().rankUpdate(A);
benchmark::DoNotOptimize(C.data());
}
state.counters["GFLOPS"] =
benchmark::Counter(1.0 * n * n * k, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
static void SymmSizes(::benchmark::Benchmark* b) {
for (int n : {64, 128, 256, 512, 1024}) b->Arg(n);
}
static void RankUpdateSizes(::benchmark::Benchmark* b) {
for (int n : {64, 128, 256, 512}) {
for (int k : {16, 64, 256}) {
b->Args({n, k});
}
}
}
BENCHMARK(BM_SYMM_Left<float>)->Apply(SymmSizes)->Name("SYMM_Left_float");
BENCHMARK(BM_SYMM_Left<double>)->Apply(SymmSizes)->Name("SYMM_Left_double");
BENCHMARK(BM_SYMM_Right<float>)->Apply(SymmSizes)->Name("SYMM_Right_float");
BENCHMARK(BM_SYMM_Right<double>)->Apply(SymmSizes)->Name("SYMM_Right_double");
BENCHMARK(BM_RankUpdate<float>)->Apply(RankUpdateSizes)->Name("RankUpdate_float");
BENCHMARK(BM_RankUpdate<double>)->Apply(RankUpdateSizes)->Name("RankUpdate_double");

View File

@@ -0,0 +1,56 @@
// Benchmarks for triangular-dense matrix products (TRMM).
//
// Tests C = triangular(A) * B for various modes (Lower/Upper) and sides (Left/Right).
#include <benchmark/benchmark.h>
#include <Eigen/Core>
using namespace Eigen;
// C = triangularView<Mode>(A) * B
template <typename Scalar, unsigned int Mode>
static void BM_TRMM_Left(benchmark::State& state) {
const Index n = state.range(0);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(n, n);
Mat B = Mat::Random(n, n);
Mat C(n, n);
for (auto _ : state) {
C.noalias() = A.template triangularView<Mode>() * B;
benchmark::DoNotOptimize(C.data());
}
state.counters["GFLOPS"] =
benchmark::Counter(1.0 * n * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
// C = B * triangularView<Mode>(A)
template <typename Scalar, unsigned int Mode>
static void BM_TRMM_Right(benchmark::State& state) {
const Index n = state.range(0);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(n, n);
Mat B = Mat::Random(n, n);
Mat C(n, n);
for (auto _ : state) {
C.noalias() = B * A.template triangularView<Mode>();
benchmark::DoNotOptimize(C.data());
}
state.counters["GFLOPS"] =
benchmark::Counter(1.0 * n * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
}
static void TrmmSizes(::benchmark::Benchmark* b) {
for (int n : {64, 128, 256, 512, 1024}) b->Arg(n);
}
// Left product
BENCHMARK(BM_TRMM_Left<float, Lower>)->Apply(TrmmSizes)->Name("TRMM_Left_float_Lower");
BENCHMARK(BM_TRMM_Left<float, Upper>)->Apply(TrmmSizes)->Name("TRMM_Left_float_Upper");
BENCHMARK(BM_TRMM_Left<double, Lower>)->Apply(TrmmSizes)->Name("TRMM_Left_double_Lower");
BENCHMARK(BM_TRMM_Left<double, Upper>)->Apply(TrmmSizes)->Name("TRMM_Left_double_Upper");
// Right product
BENCHMARK(BM_TRMM_Right<float, Lower>)->Apply(TrmmSizes)->Name("TRMM_Right_float_Lower");
BENCHMARK(BM_TRMM_Right<float, Upper>)->Apply(TrmmSizes)->Name("TRMM_Right_float_Upper");
BENCHMARK(BM_TRMM_Right<double, Lower>)->Apply(TrmmSizes)->Name("TRMM_Right_double_Lower");
BENCHMARK(BM_TRMM_Right<double, Upper>)->Apply(TrmmSizes)->Name("TRMM_Right_double_Upper");

View File

@@ -1 +1 @@
# LU benchmarks will be added here.
eigen_add_benchmark(bench_lu bench_lu.cpp)

139
benchmarks/LU/bench_lu.cpp Normal file
View File

@@ -0,0 +1,139 @@
// Benchmarks for LU decompositions.
//
// Tests PartialPivLU and FullPivLU: compute, solve, inverse, determinant.
#include <benchmark/benchmark.h>
#include <Eigen/LU>
using namespace Eigen;
typedef Matrix<float, Dynamic, Dynamic> Matf;
typedef Matrix<double, Dynamic, Dynamic> Matd;
// --- PartialPivLU ---
template <typename Scalar>
EIGEN_DONT_INLINE void do_compute(PartialPivLU<Matrix<Scalar, Dynamic, Dynamic>>& lu,
const Matrix<Scalar, Dynamic, Dynamic>& A) {
lu.compute(A);
}
template <typename Scalar>
static void BM_PartialPivLU_Compute(benchmark::State& state) {
const Index n = state.range(0);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(n, n);
PartialPivLU<Mat> lu(n);
for (auto _ : state) {
do_compute(lu, A);
benchmark::DoNotOptimize(lu.matrixLU().data());
}
state.SetItemsProcessed(state.iterations());
}
template <typename Scalar>
static void BM_PartialPivLU_Solve(benchmark::State& state) {
const Index n = state.range(0);
const Index nrhs = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(n, n);
Mat B = Mat::Random(n, nrhs);
PartialPivLU<Mat> lu(A);
Mat X(n, nrhs);
for (auto _ : state) {
X = lu.solve(B);
benchmark::DoNotOptimize(X.data());
}
state.SetItemsProcessed(state.iterations());
}
template <typename Scalar>
static void BM_PartialPivLU_Inverse(benchmark::State& state) {
const Index n = state.range(0);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(n, n);
PartialPivLU<Mat> lu(A);
Mat inv(n, n);
for (auto _ : state) {
inv = lu.inverse();
benchmark::DoNotOptimize(inv.data());
}
state.SetItemsProcessed(state.iterations());
}
template <typename Scalar>
static void BM_PartialPivLU_Determinant(benchmark::State& state) {
const Index n = state.range(0);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(n, n);
PartialPivLU<Mat> lu(A);
for (auto _ : state) {
Scalar d = lu.determinant();
benchmark::DoNotOptimize(d);
}
state.SetItemsProcessed(state.iterations());
}
// --- FullPivLU ---
template <typename Scalar>
EIGEN_DONT_INLINE void do_compute(FullPivLU<Matrix<Scalar, Dynamic, Dynamic>>& lu,
const Matrix<Scalar, Dynamic, Dynamic>& A) {
lu.compute(A);
}
template <typename Scalar>
static void BM_FullPivLU_Compute(benchmark::State& state) {
const Index n = state.range(0);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(n, n);
FullPivLU<Mat> lu(n, n);
for (auto _ : state) {
do_compute(lu, A);
benchmark::DoNotOptimize(lu.matrixLU().data());
}
state.SetItemsProcessed(state.iterations());
}
template <typename Scalar>
static void BM_FullPivLU_Solve(benchmark::State& state) {
const Index n = state.range(0);
const Index nrhs = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(n, n);
Mat B = Mat::Random(n, nrhs);
FullPivLU<Mat> lu(A);
Mat X(n, nrhs);
for (auto _ : state) {
X = lu.solve(B);
benchmark::DoNotOptimize(X.data());
}
state.SetItemsProcessed(state.iterations());
}
// --- Size configurations ---
static void SquareSizes(::benchmark::Benchmark* b) {
for (int n : {8, 32, 64, 128, 256, 512, 1024}) b->Arg(n);
}
static void SolveSizes(::benchmark::Benchmark* b) {
for (int n : {32, 128, 512, 1024}) {
for (int nrhs : {1, 16, 64}) {
b->Args({n, nrhs});
}
}
}
BENCHMARK(BM_PartialPivLU_Compute<float>)->Apply(SquareSizes)->Name("PartialPivLU_Compute_float");
BENCHMARK(BM_PartialPivLU_Compute<double>)->Apply(SquareSizes)->Name("PartialPivLU_Compute_double");
BENCHMARK(BM_PartialPivLU_Solve<float>)->Apply(SolveSizes)->Name("PartialPivLU_Solve_float");
BENCHMARK(BM_PartialPivLU_Solve<double>)->Apply(SolveSizes)->Name("PartialPivLU_Solve_double");
BENCHMARK(BM_PartialPivLU_Inverse<float>)->Apply(SquareSizes)->Name("PartialPivLU_Inverse_float");
BENCHMARK(BM_PartialPivLU_Inverse<double>)->Apply(SquareSizes)->Name("PartialPivLU_Inverse_double");
BENCHMARK(BM_PartialPivLU_Determinant<float>)->Apply(SquareSizes)->Name("PartialPivLU_Determinant_float");
BENCHMARK(BM_PartialPivLU_Determinant<double>)->Apply(SquareSizes)->Name("PartialPivLU_Determinant_double");
BENCHMARK(BM_FullPivLU_Compute<float>)->Apply(SquareSizes)->Name("FullPivLU_Compute_float");
BENCHMARK(BM_FullPivLU_Compute<double>)->Apply(SquareSizes)->Name("FullPivLU_Compute_double");
BENCHMARK(BM_FullPivLU_Solve<float>)->Apply(SolveSizes)->Name("FullPivLU_Solve_float");
BENCHMARK(BM_FullPivLU_Solve<double>)->Apply(SolveSizes)->Name("FullPivLU_Solve_double");

View File

@@ -1 +1 @@
# QR benchmarks will be added here.
eigen_add_benchmark(bench_qr bench_qr.cpp)

123
benchmarks/QR/bench_qr.cpp Normal file
View File

@@ -0,0 +1,123 @@
// Benchmarks for QR decompositions.
//
// Tests HouseholderQR, ColPivHouseholderQR, FullPivHouseholderQR, and COD.
// Both square and tall-thin matrix shapes are tested.
#include <benchmark/benchmark.h>
#include <Eigen/QR>
using namespace Eigen;
template <typename QR>
EIGEN_DONT_INLINE void do_compute(QR& qr, const typename QR::MatrixType& A) {
qr.compute(A);
}
// --- HouseholderQR ---
template <typename Scalar>
static void BM_HouseholderQR(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(rows, cols);
HouseholderQR<Mat> qr(rows, cols);
for (auto _ : state) {
do_compute(qr, A);
benchmark::DoNotOptimize(qr.matrixQR().data());
}
state.SetItemsProcessed(state.iterations());
}
// --- ColPivHouseholderQR ---
template <typename Scalar>
static void BM_ColPivHouseholderQR(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(rows, cols);
ColPivHouseholderQR<Mat> qr(rows, cols);
for (auto _ : state) {
do_compute(qr, A);
benchmark::DoNotOptimize(qr.matrixQR().data());
}
state.SetItemsProcessed(state.iterations());
}
// --- FullPivHouseholderQR ---
template <typename Scalar>
static void BM_FullPivHouseholderQR(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(rows, cols);
FullPivHouseholderQR<Mat> qr(rows, cols);
for (auto _ : state) {
do_compute(qr, A);
benchmark::DoNotOptimize(qr.matrixQR().data());
}
state.SetItemsProcessed(state.iterations());
}
// --- CompleteOrthogonalDecomposition (COD) ---
template <typename Scalar>
static void BM_COD(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
Mat A = Mat::Random(rows, cols);
CompleteOrthogonalDecomposition<Mat> cod(rows, cols);
for (auto _ : state) {
do_compute(cod, A);
benchmark::DoNotOptimize(cod.matrixQTZ().data());
}
state.SetItemsProcessed(state.iterations());
}
// --- QR solve ---
template <typename Scalar>
static void BM_HouseholderQR_Solve(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
using Vec = Matrix<Scalar, Dynamic, 1>;
Mat A = Mat::Random(rows, cols);
Vec b = Vec::Random(rows);
HouseholderQR<Mat> qr(A);
Vec x(cols);
for (auto _ : state) {
x = qr.solve(b);
benchmark::DoNotOptimize(x.data());
}
state.SetItemsProcessed(state.iterations());
}
// --- Size configurations ---
static void QrSizes(::benchmark::Benchmark* b) {
// Square
for (int n : {32, 64, 128, 256, 512, 1024}) b->Args({n, n});
// Tall-thin
b->Args({1000, 32});
b->Args({1000, 100});
b->Args({10000, 32});
b->Args({10000, 100});
}
// Register: float
BENCHMARK(BM_HouseholderQR<float>)->Apply(QrSizes)->Name("HouseholderQR_float");
BENCHMARK(BM_ColPivHouseholderQR<float>)->Apply(QrSizes)->Name("ColPivHouseholderQR_float");
BENCHMARK(BM_FullPivHouseholderQR<float>)->Apply(QrSizes)->Name("FullPivHouseholderQR_float");
BENCHMARK(BM_COD<float>)->Apply(QrSizes)->Name("COD_float");
BENCHMARK(BM_HouseholderQR_Solve<float>)->Apply(QrSizes)->Name("HouseholderQR_Solve_float");
// Register: double
BENCHMARK(BM_HouseholderQR<double>)->Apply(QrSizes)->Name("HouseholderQR_double");
BENCHMARK(BM_ColPivHouseholderQR<double>)->Apply(QrSizes)->Name("ColPivHouseholderQR_double");
BENCHMARK(BM_FullPivHouseholderQR<double>)->Apply(QrSizes)->Name("FullPivHouseholderQR_double");
BENCHMARK(BM_COD<double>)->Apply(QrSizes)->Name("COD_double");
BENCHMARK(BM_HouseholderQR_Solve<double>)->Apply(QrSizes)->Name("HouseholderQR_Solve_double");