From 1f49bf96cf87158f1cc0518e656061383635db9e Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen <4643818-rmlarsen1@users.noreply.gitlab.com> Date: Sun, 22 Feb 2026 12:19:37 -0800 Subject: [PATCH] Add new benchmarks for Core, LU, and QR operations libeigen/eigen!2177 Closes #3035 Co-authored-by: Rasmus Munk Larsen --- benchmarks/Core/CMakeLists.txt | 10 + benchmarks/Core/bench_block_ops.cpp | 89 ++++++++ benchmarks/Core/bench_broadcasting.cpp | 192 ++++++++++++++++++ benchmarks/Core/bench_construction.cpp | 138 +++++++++++++ benchmarks/Core/bench_cwise_math.cpp | 120 +++++++++++ benchmarks/Core/bench_diagonal.cpp | 81 ++++++++ benchmarks/Core/bench_dot.cpp | 51 +++++ benchmarks/Core/bench_map.cpp | 103 ++++++++++ benchmarks/Core/bench_reductions.cpp | 173 ++++++++++++++++ benchmarks/Core/bench_selfadjoint_product.cpp | 78 +++++++ benchmarks/Core/bench_triangular_product.cpp | 56 +++++ benchmarks/LU/CMakeLists.txt | 2 +- benchmarks/LU/bench_lu.cpp | 139 +++++++++++++ benchmarks/QR/CMakeLists.txt | 2 +- benchmarks/QR/bench_qr.cpp | 123 +++++++++++ 15 files changed, 1355 insertions(+), 2 deletions(-) create mode 100644 benchmarks/Core/bench_block_ops.cpp create mode 100644 benchmarks/Core/bench_broadcasting.cpp create mode 100644 benchmarks/Core/bench_construction.cpp create mode 100644 benchmarks/Core/bench_cwise_math.cpp create mode 100644 benchmarks/Core/bench_diagonal.cpp create mode 100644 benchmarks/Core/bench_dot.cpp create mode 100644 benchmarks/Core/bench_map.cpp create mode 100644 benchmarks/Core/bench_reductions.cpp create mode 100644 benchmarks/Core/bench_selfadjoint_product.cpp create mode 100644 benchmarks/Core/bench_triangular_product.cpp create mode 100644 benchmarks/LU/bench_lu.cpp create mode 100644 benchmarks/QR/bench_qr.cpp diff --git a/benchmarks/Core/CMakeLists.txt b/benchmarks/Core/CMakeLists.txt index 9dbb4fad9..7cc62d1d7 100644 --- a/benchmarks/Core/CMakeLists.txt +++ b/benchmarks/Core/CMakeLists.txt @@ -5,3 +5,13 @@ eigen_add_benchmark(bench_vecadd bench_vecadd.cpp) eigen_add_benchmark(bench_trsm bench_trsm.cpp) eigen_add_benchmark(bench_reverse bench_reverse.cpp) eigen_add_benchmark(bench_move_semantics bench_move_semantics.cpp) +eigen_add_benchmark(bench_reductions bench_reductions.cpp) +eigen_add_benchmark(bench_dot bench_dot.cpp) +eigen_add_benchmark(bench_cwise_math bench_cwise_math.cpp) +eigen_add_benchmark(bench_broadcasting bench_broadcasting.cpp) +eigen_add_benchmark(bench_block_ops bench_block_ops.cpp) +eigen_add_benchmark(bench_map bench_map.cpp) +eigen_add_benchmark(bench_diagonal bench_diagonal.cpp) +eigen_add_benchmark(bench_triangular_product bench_triangular_product.cpp) +eigen_add_benchmark(bench_selfadjoint_product bench_selfadjoint_product.cpp) +eigen_add_benchmark(bench_construction bench_construction.cpp) diff --git a/benchmarks/Core/bench_block_ops.cpp b/benchmarks/Core/bench_block_ops.cpp new file mode 100644 index 000000000..5d13f8904 --- /dev/null +++ b/benchmarks/Core/bench_block_ops.cpp @@ -0,0 +1,89 @@ +// Benchmarks for block extraction and assignment operations. +// +// Tests sub-matrix views: block(), topRows(), leftCols(), middleCols(). +// Measures expression template overhead for read and write patterns. + +#include +#include + +using namespace Eigen; + +// Read a block and assign to a separate matrix (forces evaluation). +template +static void BM_BlockRead(benchmark::State& state) { + const Index n = state.range(0); + const Index block_size = state.range(1); + using Mat = Matrix; + Mat src = Mat::Random(n, n); + Mat dst(block_size, block_size); + const Index off = (n - block_size) / 2; + for (auto _ : state) { + dst = src.block(off, off, block_size, block_size); + benchmark::DoNotOptimize(dst.data()); + } + state.SetBytesProcessed(state.iterations() * block_size * block_size * sizeof(Scalar)); +} + +// Write into a block of a larger matrix. +template +static void BM_BlockWrite(benchmark::State& state) { + const Index n = state.range(0); + const Index block_size = state.range(1); + using Mat = Matrix; + Mat dst = Mat::Random(n, n); + Mat src = Mat::Random(block_size, block_size); + const Index off = (n - block_size) / 2; + for (auto _ : state) { + dst.block(off, off, block_size, block_size) = src; + benchmark::DoNotOptimize(dst.data()); + } + state.SetBytesProcessed(state.iterations() * block_size * block_size * sizeof(Scalar)); +} + +// topRows extraction. +template +static void BM_TopRows(benchmark::State& state) { + const Index n = state.range(0); + const Index k = state.range(1); + using Mat = Matrix; + Mat src = Mat::Random(n, n); + Mat dst(k, n); + for (auto _ : state) { + dst = src.topRows(k); + benchmark::DoNotOptimize(dst.data()); + } + state.SetBytesProcessed(state.iterations() * k * n * sizeof(Scalar)); +} + +// leftCols extraction. +template +static void BM_LeftCols(benchmark::State& state) { + const Index n = state.range(0); + const Index k = state.range(1); + using Mat = Matrix; + Mat src = Mat::Random(n, n); + Mat dst(n, k); + for (auto _ : state) { + dst = src.leftCols(k); + benchmark::DoNotOptimize(dst.data()); + } + state.SetBytesProcessed(state.iterations() * n * k * sizeof(Scalar)); +} + +static void BlockSizes(::benchmark::Benchmark* b) { + // (matrix_size, block_size) + for (int n : {256, 512, 1024}) { + for (int bs : {16, 64, 128}) { + if (bs <= n) b->Args({n, bs}); + } + } +} + +BENCHMARK(BM_BlockRead)->Apply(BlockSizes)->Name("BlockRead_float"); +BENCHMARK(BM_BlockRead)->Apply(BlockSizes)->Name("BlockRead_double"); +BENCHMARK(BM_BlockWrite)->Apply(BlockSizes)->Name("BlockWrite_float"); +BENCHMARK(BM_BlockWrite)->Apply(BlockSizes)->Name("BlockWrite_double"); +BENCHMARK(BM_TopRows)->Apply(BlockSizes)->Name("TopRows_float"); +BENCHMARK(BM_TopRows)->Apply(BlockSizes)->Name("TopRows_double"); +BENCHMARK(BM_LeftCols)->Apply(BlockSizes)->Name("LeftCols_float"); +BENCHMARK(BM_LeftCols)->Apply(BlockSizes)->Name("LeftCols_double"); diff --git a/benchmarks/Core/bench_broadcasting.cpp b/benchmarks/Core/bench_broadcasting.cpp new file mode 100644 index 000000000..f7e3c45a7 --- /dev/null +++ b/benchmarks/Core/bench_broadcasting.cpp @@ -0,0 +1,192 @@ +// Benchmarks for colwise/rowwise reductions and broadcasting operations. +// +// Tests vectorwise reductions (sum, mean, norm, minCoeff, maxCoeff) and +// broadcasting arithmetic (rowwise += vec, colwise -= vec, rowwise *= vec). + +#include +#include + +using namespace Eigen; + +// --- Colwise reductions (reduce each column to a scalar) --- + +template +static void BM_ColwiseSum(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + Mat m = Mat::Random(rows, cols); + Matrix result(cols); + for (auto _ : state) { + result = m.colwise().sum(); + benchmark::DoNotOptimize(result.data()); + } + state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar)); +} + +template +static void BM_ColwiseMean(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + Mat m = Mat::Random(rows, cols); + Matrix result(cols); + for (auto _ : state) { + result = m.colwise().mean(); + benchmark::DoNotOptimize(result.data()); + } + state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar)); +} + +template +static void BM_ColwiseNorm(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + Mat m = Mat::Random(rows, cols); + Matrix result(cols); + for (auto _ : state) { + result = m.colwise().norm(); + benchmark::DoNotOptimize(result.data()); + } + state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar)); +} + +template +static void BM_ColwiseMinCoeff(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + Mat m = Mat::Random(rows, cols); + Matrix result(cols); + for (auto _ : state) { + result = m.colwise().minCoeff(); + benchmark::DoNotOptimize(result.data()); + } + state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar)); +} + +template +static void BM_ColwiseMaxCoeff(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + Mat m = Mat::Random(rows, cols); + Matrix result(cols); + for (auto _ : state) { + result = m.colwise().maxCoeff(); + benchmark::DoNotOptimize(result.data()); + } + state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar)); +} + +// --- Rowwise reductions (reduce each row to a scalar) --- + +template +static void BM_RowwiseSum(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + Mat m = Mat::Random(rows, cols); + Matrix result(rows); + for (auto _ : state) { + result = m.rowwise().sum(); + benchmark::DoNotOptimize(result.data()); + } + state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar)); +} + +template +static void BM_RowwiseNorm(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + Mat m = Mat::Random(rows, cols); + Matrix result(rows); + for (auto _ : state) { + result = m.rowwise().norm(); + benchmark::DoNotOptimize(result.data()); + } + state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar)); +} + +// --- Broadcasting operations --- + +template +static void BM_RowwiseBroadcastAdd(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + using Vec = Matrix; + Mat m = Mat::Random(rows, cols); + Vec v = Vec::Random(cols); + for (auto _ : state) { + m.noalias() = m.rowwise() + v; + benchmark::DoNotOptimize(m.data()); + } + state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar) * 2); +} + +template +static void BM_ColwiseBroadcastAdd(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + using Vec = Matrix; + Mat m = Mat::Random(rows, cols); + Vec v = Vec::Random(rows); + for (auto _ : state) { + m.noalias() = m.colwise() + v; + benchmark::DoNotOptimize(m.data()); + } + state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar) * 2); +} + +template +static void BM_RowwiseBroadcastMul(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + Mat m = Mat::Random(rows, cols); + Array v = Array::Random(cols); + for (auto _ : state) { + m.array().rowwise() *= v; + benchmark::DoNotOptimize(m.data()); + } + state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar) * 2); +} + +// --- Size configurations --- + +static void BroadcastSizes(::benchmark::Benchmark* b) { + // Square matrices + for (int n : {64, 128, 256, 512, 1024}) b->Args({n, n}); + // Tall-thin (many rows, few cols) + b->Args({10000, 32}); + // Short-wide (few rows, many cols) + b->Args({32, 10000}); +} + +// --- Register: float --- +BENCHMARK(BM_ColwiseSum)->Apply(BroadcastSizes)->Name("ColwiseSum_float"); +BENCHMARK(BM_ColwiseMean)->Apply(BroadcastSizes)->Name("ColwiseMean_float"); +BENCHMARK(BM_ColwiseNorm)->Apply(BroadcastSizes)->Name("ColwiseNorm_float"); +BENCHMARK(BM_ColwiseMinCoeff)->Apply(BroadcastSizes)->Name("ColwiseMinCoeff_float"); +BENCHMARK(BM_ColwiseMaxCoeff)->Apply(BroadcastSizes)->Name("ColwiseMaxCoeff_float"); +BENCHMARK(BM_RowwiseSum)->Apply(BroadcastSizes)->Name("RowwiseSum_float"); +BENCHMARK(BM_RowwiseNorm)->Apply(BroadcastSizes)->Name("RowwiseNorm_float"); +BENCHMARK(BM_RowwiseBroadcastAdd)->Apply(BroadcastSizes)->Name("RowwiseBroadcastAdd_float"); +BENCHMARK(BM_ColwiseBroadcastAdd)->Apply(BroadcastSizes)->Name("ColwiseBroadcastAdd_float"); +BENCHMARK(BM_RowwiseBroadcastMul)->Apply(BroadcastSizes)->Name("RowwiseBroadcastMul_float"); + +// --- Register: double --- +BENCHMARK(BM_ColwiseSum)->Apply(BroadcastSizes)->Name("ColwiseSum_double"); +BENCHMARK(BM_ColwiseMean)->Apply(BroadcastSizes)->Name("ColwiseMean_double"); +BENCHMARK(BM_ColwiseNorm)->Apply(BroadcastSizes)->Name("ColwiseNorm_double"); +BENCHMARK(BM_ColwiseMinCoeff)->Apply(BroadcastSizes)->Name("ColwiseMinCoeff_double"); +BENCHMARK(BM_ColwiseMaxCoeff)->Apply(BroadcastSizes)->Name("ColwiseMaxCoeff_double"); +BENCHMARK(BM_RowwiseSum)->Apply(BroadcastSizes)->Name("RowwiseSum_double"); +BENCHMARK(BM_RowwiseNorm)->Apply(BroadcastSizes)->Name("RowwiseNorm_double"); +BENCHMARK(BM_RowwiseBroadcastAdd)->Apply(BroadcastSizes)->Name("RowwiseBroadcastAdd_double"); +BENCHMARK(BM_ColwiseBroadcastAdd)->Apply(BroadcastSizes)->Name("ColwiseBroadcastAdd_double"); +BENCHMARK(BM_RowwiseBroadcastMul)->Apply(BroadcastSizes)->Name("RowwiseBroadcastMul_double"); diff --git a/benchmarks/Core/bench_construction.cpp b/benchmarks/Core/bench_construction.cpp new file mode 100644 index 000000000..6479eb3e2 --- /dev/null +++ b/benchmarks/Core/bench_construction.cpp @@ -0,0 +1,138 @@ +// Benchmarks for matrix initialization / construction. +// +// Tests setZero, setRandom, setIdentity, LinSpaced, Zero(), Constant() +// for both dynamic and small fixed-size matrices. + +#include +#include + +using namespace Eigen; + +// --- Dynamic-size construction --- + +template +static void BM_SetZero(benchmark::State& state) { + const Index n = state.range(0); + Matrix m(n, n); + for (auto _ : state) { + m.setZero(); + benchmark::DoNotOptimize(m.data()); + } + state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar)); +} + +template +static void BM_SetRandom(benchmark::State& state) { + const Index n = state.range(0); + Matrix m(n, n); + for (auto _ : state) { + m.setRandom(); + benchmark::DoNotOptimize(m.data()); + } + state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar)); +} + +template +static void BM_SetIdentity(benchmark::State& state) { + const Index n = state.range(0); + Matrix m(n, n); + for (auto _ : state) { + m.setIdentity(); + benchmark::DoNotOptimize(m.data()); + } + state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar)); +} + +template +static void BM_SetConstant(benchmark::State& state) { + const Index n = state.range(0); + Matrix m(n, n); + for (auto _ : state) { + m.setConstant(Scalar(42)); + benchmark::DoNotOptimize(m.data()); + } + state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar)); +} + +template +static void BM_LinSpaced(benchmark::State& state) { + const Index n = state.range(0); + Matrix v(n); + for (auto _ : state) { + v = Matrix::LinSpaced(n, Scalar(0), Scalar(1)); + benchmark::DoNotOptimize(v.data()); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); +} + +// --- Fixed-size construction --- + +template +static void BM_FixedSetZero(benchmark::State& state) { + Matrix m; + for (auto _ : state) { + m.setZero(); + benchmark::DoNotOptimize(m.data()); + } + state.SetBytesProcessed(state.iterations() * N * N * sizeof(Scalar)); +} + +template +static void BM_FixedSetRandom(benchmark::State& state) { + Matrix m; + for (auto _ : state) { + m.setRandom(); + benchmark::DoNotOptimize(m.data()); + } + state.SetBytesProcessed(state.iterations() * N * N * sizeof(Scalar)); +} + +template +static void BM_FixedSetIdentity(benchmark::State& state) { + Matrix m; + for (auto _ : state) { + m.setIdentity(); + benchmark::DoNotOptimize(m.data()); + } + state.SetBytesProcessed(state.iterations() * N * N * sizeof(Scalar)); +} + +// --- Size configurations --- + +static void DynamicSizes(::benchmark::Benchmark* b) { + for (int n : {4, 8, 16, 32, 64, 128, 256, 512, 1024}) b->Arg(n); +} + +static void VectorSizes(::benchmark::Benchmark* b) { + for (int n : {64, 256, 1024, 4096, 16384, 65536}) b->Arg(n); +} + +// --- Register: dynamic float --- +BENCHMARK(BM_SetZero)->Apply(DynamicSizes)->Name("SetZero_float"); +BENCHMARK(BM_SetRandom)->Apply(DynamicSizes)->Name("SetRandom_float"); +BENCHMARK(BM_SetIdentity)->Apply(DynamicSizes)->Name("SetIdentity_float"); +BENCHMARK(BM_SetConstant)->Apply(DynamicSizes)->Name("SetConstant_float"); +BENCHMARK(BM_LinSpaced)->Apply(VectorSizes)->Name("LinSpaced_float"); + +// --- Register: dynamic double --- +BENCHMARK(BM_SetZero)->Apply(DynamicSizes)->Name("SetZero_double"); +BENCHMARK(BM_SetRandom)->Apply(DynamicSizes)->Name("SetRandom_double"); +BENCHMARK(BM_SetIdentity)->Apply(DynamicSizes)->Name("SetIdentity_double"); +BENCHMARK(BM_SetConstant)->Apply(DynamicSizes)->Name("SetConstant_double"); +BENCHMARK(BM_LinSpaced)->Apply(VectorSizes)->Name("LinSpaced_double"); + +// --- Register: fixed-size float --- +BENCHMARK(BM_FixedSetZero)->Name("FixedSetZero_float_2x2"); +BENCHMARK(BM_FixedSetZero)->Name("FixedSetZero_float_3x3"); +BENCHMARK(BM_FixedSetZero)->Name("FixedSetZero_float_4x4"); +BENCHMARK(BM_FixedSetZero)->Name("FixedSetZero_float_8x8"); +BENCHMARK(BM_FixedSetRandom)->Name("FixedSetRandom_float_4x4"); +BENCHMARK(BM_FixedSetIdentity)->Name("FixedSetIdentity_float_4x4"); + +// --- Register: fixed-size double --- +BENCHMARK(BM_FixedSetZero)->Name("FixedSetZero_double_2x2"); +BENCHMARK(BM_FixedSetZero)->Name("FixedSetZero_double_3x3"); +BENCHMARK(BM_FixedSetZero)->Name("FixedSetZero_double_4x4"); +BENCHMARK(BM_FixedSetZero)->Name("FixedSetZero_double_8x8"); +BENCHMARK(BM_FixedSetRandom)->Name("FixedSetRandom_double_4x4"); +BENCHMARK(BM_FixedSetIdentity)->Name("FixedSetIdentity_double_4x4"); diff --git a/benchmarks/Core/bench_cwise_math.cpp b/benchmarks/Core/bench_cwise_math.cpp new file mode 100644 index 000000000..6c2da094e --- /dev/null +++ b/benchmarks/Core/bench_cwise_math.cpp @@ -0,0 +1,120 @@ +// Benchmarks for vectorized coefficient-wise math functions. +// +// Each function is benchmarked on ArrayXf/ArrayXd with inputs chosen to +// stay in the valid domain and avoid NaN/Inf. + +#include +#include +#include + +using namespace Eigen; + +// Macro to define a benchmark for a unary array operation. +// NAME: benchmark function suffix (e.g. Exp) +// EXPR: expression applied to the array (e.g. a.exp()) +// LO, HI: input range [LO, HI] mapped from the default Random() range [-1,1] +#define BENCH_CWISE_UNARY(NAME, EXPR, LO, HI) \ + template \ + static void BM_##NAME(benchmark::State& state) { \ + const Index n = state.range(0); \ + using Arr = Array; \ + /* Map Random [-1,1] to [LO, HI] */ \ + Arr a = (Arr::Random(n) + Scalar(1)) * Scalar((double(HI) - double(LO)) / 2.0) + Scalar(LO); \ + Arr b(n); \ + for (auto _ : state) { \ + b = EXPR; \ + benchmark::DoNotOptimize(b.data()); \ + } \ + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar) * 2); \ + } + +// Transcendental functions +BENCH_CWISE_UNARY(Exp, a.exp(), -10, 10) +BENCH_CWISE_UNARY(Log, a.log(), 0.01, 100) +BENCH_CWISE_UNARY(Log1p, a.log1p(), -0.5, 100) +BENCH_CWISE_UNARY(Sqrt, a.sqrt(), 0, 100) +BENCH_CWISE_UNARY(Rsqrt, a.rsqrt(), 0.01, 100) + +// Trigonometric functions +BENCH_CWISE_UNARY(Sin, a.sin(), -3.14, 3.14) +BENCH_CWISE_UNARY(Cos, a.cos(), -3.14, 3.14) +BENCH_CWISE_UNARY(Tan, a.tan(), -1.5, 1.5) +BENCH_CWISE_UNARY(Asin, a.asin(), -0.99, 0.99) +BENCH_CWISE_UNARY(Atan, a.atan(), -10, 10) + +// Hyperbolic / special +BENCH_CWISE_UNARY(Tanh, a.tanh(), -5, 5) +BENCH_CWISE_UNARY(Erf, Eigen::erf(a), -4, 4) + +// Simple operations (should be very fast / memory-bound) +BENCH_CWISE_UNARY(Abs, a.abs(), -100, 100) +BENCH_CWISE_UNARY(Square, a.square(), -100, 100) +BENCH_CWISE_UNARY(Cube, a.cube(), -10, 10) +BENCH_CWISE_UNARY(Ceil, a.ceil(), -100, 100) +BENCH_CWISE_UNARY(Floor, a.floor(), -100, 100) +BENCH_CWISE_UNARY(Round, a.round(), -100, 100) + +// Sigmoid: 1 / (1 + exp(-x)), common in ML. +BENCH_CWISE_UNARY(Sigmoid, Scalar(1) / (Scalar(1) + (-a).exp()), -10, 10) + +// Power: array^scalar +template +static void BM_Pow(benchmark::State& state) { + const Index n = state.range(0); + using Arr = Array; + Arr a = (Arr::Random(n) + Scalar(1)) * Scalar(50); // [0, 100] + Arr b(n); + for (auto _ : state) { + b = a.pow(Scalar(2.5)); + benchmark::DoNotOptimize(b.data()); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar) * 2); +} + +static void CwiseSizes(::benchmark::Benchmark* b) { + for (int n : {1024, 4096, 16384, 65536, 262144, 1048576}) b->Arg(n); +} + +// --- Register float --- +BENCHMARK(BM_Exp)->Apply(CwiseSizes)->Name("Exp_float"); +BENCHMARK(BM_Log)->Apply(CwiseSizes)->Name("Log_float"); +BENCHMARK(BM_Log1p)->Apply(CwiseSizes)->Name("Log1p_float"); +BENCHMARK(BM_Sqrt)->Apply(CwiseSizes)->Name("Sqrt_float"); +BENCHMARK(BM_Rsqrt)->Apply(CwiseSizes)->Name("Rsqrt_float"); +BENCHMARK(BM_Sin)->Apply(CwiseSizes)->Name("Sin_float"); +BENCHMARK(BM_Cos)->Apply(CwiseSizes)->Name("Cos_float"); +BENCHMARK(BM_Tan)->Apply(CwiseSizes)->Name("Tan_float"); +BENCHMARK(BM_Asin)->Apply(CwiseSizes)->Name("Asin_float"); +BENCHMARK(BM_Atan)->Apply(CwiseSizes)->Name("Atan_float"); +BENCHMARK(BM_Tanh)->Apply(CwiseSizes)->Name("Tanh_float"); +BENCHMARK(BM_Erf)->Apply(CwiseSizes)->Name("Erf_float"); +BENCHMARK(BM_Abs)->Apply(CwiseSizes)->Name("Abs_float"); +BENCHMARK(BM_Square)->Apply(CwiseSizes)->Name("Square_float"); +BENCHMARK(BM_Cube)->Apply(CwiseSizes)->Name("Cube_float"); +BENCHMARK(BM_Ceil)->Apply(CwiseSizes)->Name("Ceil_float"); +BENCHMARK(BM_Floor)->Apply(CwiseSizes)->Name("Floor_float"); +BENCHMARK(BM_Round)->Apply(CwiseSizes)->Name("Round_float"); +BENCHMARK(BM_Sigmoid)->Apply(CwiseSizes)->Name("Sigmoid_float"); +BENCHMARK(BM_Pow)->Apply(CwiseSizes)->Name("Pow_float"); + +// --- Register double --- +BENCHMARK(BM_Exp)->Apply(CwiseSizes)->Name("Exp_double"); +BENCHMARK(BM_Log)->Apply(CwiseSizes)->Name("Log_double"); +BENCHMARK(BM_Log1p)->Apply(CwiseSizes)->Name("Log1p_double"); +BENCHMARK(BM_Sqrt)->Apply(CwiseSizes)->Name("Sqrt_double"); +BENCHMARK(BM_Rsqrt)->Apply(CwiseSizes)->Name("Rsqrt_double"); +BENCHMARK(BM_Sin)->Apply(CwiseSizes)->Name("Sin_double"); +BENCHMARK(BM_Cos)->Apply(CwiseSizes)->Name("Cos_double"); +BENCHMARK(BM_Tan)->Apply(CwiseSizes)->Name("Tan_double"); +BENCHMARK(BM_Asin)->Apply(CwiseSizes)->Name("Asin_double"); +BENCHMARK(BM_Atan)->Apply(CwiseSizes)->Name("Atan_double"); +BENCHMARK(BM_Tanh)->Apply(CwiseSizes)->Name("Tanh_double"); +BENCHMARK(BM_Erf)->Apply(CwiseSizes)->Name("Erf_double"); +BENCHMARK(BM_Abs)->Apply(CwiseSizes)->Name("Abs_double"); +BENCHMARK(BM_Square)->Apply(CwiseSizes)->Name("Square_double"); +BENCHMARK(BM_Cube)->Apply(CwiseSizes)->Name("Cube_double"); +BENCHMARK(BM_Ceil)->Apply(CwiseSizes)->Name("Ceil_double"); +BENCHMARK(BM_Floor)->Apply(CwiseSizes)->Name("Floor_double"); +BENCHMARK(BM_Round)->Apply(CwiseSizes)->Name("Round_double"); +BENCHMARK(BM_Sigmoid)->Apply(CwiseSizes)->Name("Sigmoid_double"); +BENCHMARK(BM_Pow)->Apply(CwiseSizes)->Name("Pow_double"); diff --git a/benchmarks/Core/bench_diagonal.cpp b/benchmarks/Core/bench_diagonal.cpp new file mode 100644 index 000000000..5019d7384 --- /dev/null +++ b/benchmarks/Core/bench_diagonal.cpp @@ -0,0 +1,81 @@ +// Benchmarks for diagonal operations. +// +// Tests diagonal extraction, diagonal-matrix product, and matrix-diagonal product. + +#include +#include + +using namespace Eigen; + +// Extract diagonal from a square matrix and sum it. +template +static void BM_DiagonalExtract(benchmark::State& state) { + const Index n = state.range(0); + using Mat = Matrix; + Mat A = Mat::Random(n, n); + for (auto _ : state) { + Scalar s = A.diagonal().sum(); + benchmark::DoNotOptimize(s); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); +} + +// y = diag(d) * x (diagonal matrix times vector). +template +static void BM_DiagonalTimesVector(benchmark::State& state) { + const Index n = state.range(0); + using Vec = Matrix; + Vec d = Vec::Random(n); + Vec x = Vec::Random(n); + Vec y(n); + for (auto _ : state) { + y = d.asDiagonal() * x; + benchmark::DoNotOptimize(y.data()); + } + state.SetBytesProcessed(state.iterations() * 3 * n * sizeof(Scalar)); +} + +// C = diag(d) * A (diagonal matrix times dense matrix). +template +static void BM_DiagonalTimesMatrix(benchmark::State& state) { + const Index n = state.range(0); + using Mat = Matrix; + using Vec = Matrix; + Vec d = Vec::Random(n); + Mat A = Mat::Random(n, n); + Mat C(n, n); + for (auto _ : state) { + C.noalias() = d.asDiagonal() * A; + benchmark::DoNotOptimize(C.data()); + } + state.SetBytesProcessed(state.iterations() * 2 * n * n * sizeof(Scalar)); +} + +// C = A * diag(d) (dense matrix times diagonal matrix). +template +static void BM_MatrixTimesDiagonal(benchmark::State& state) { + const Index n = state.range(0); + using Mat = Matrix; + using Vec = Matrix; + Vec d = Vec::Random(n); + Mat A = Mat::Random(n, n); + Mat C(n, n); + for (auto _ : state) { + C.noalias() = A * d.asDiagonal(); + benchmark::DoNotOptimize(C.data()); + } + state.SetBytesProcessed(state.iterations() * 2 * n * n * sizeof(Scalar)); +} + +static void Sizes(::benchmark::Benchmark* b) { + for (int n : {32, 64, 128, 256, 512, 1024}) b->Arg(n); +} + +BENCHMARK(BM_DiagonalExtract)->Apply(Sizes)->Name("DiagonalExtract_float"); +BENCHMARK(BM_DiagonalExtract)->Apply(Sizes)->Name("DiagonalExtract_double"); +BENCHMARK(BM_DiagonalTimesVector)->Apply(Sizes)->Name("DiagonalTimesVector_float"); +BENCHMARK(BM_DiagonalTimesVector)->Apply(Sizes)->Name("DiagonalTimesVector_double"); +BENCHMARK(BM_DiagonalTimesMatrix)->Apply(Sizes)->Name("DiagonalTimesMatrix_float"); +BENCHMARK(BM_DiagonalTimesMatrix)->Apply(Sizes)->Name("DiagonalTimesMatrix_double"); +BENCHMARK(BM_MatrixTimesDiagonal)->Apply(Sizes)->Name("MatrixTimesDiagonal_float"); +BENCHMARK(BM_MatrixTimesDiagonal)->Apply(Sizes)->Name("MatrixTimesDiagonal_double"); diff --git a/benchmarks/Core/bench_dot.cpp b/benchmarks/Core/bench_dot.cpp new file mode 100644 index 000000000..a03416f8b --- /dev/null +++ b/benchmarks/Core/bench_dot.cpp @@ -0,0 +1,51 @@ +// Benchmarks for dot product (BLAS-1 critical path). +// +// Flop count: 2n for real, 8n for complex. + +#include +#include + +using namespace Eigen; + +template +double dotFlops(Index n) { + return (NumTraits::IsComplex ? 8.0 : 2.0) * n; +} + +template +static void BM_Dot(benchmark::State& state) { + const Index n = state.range(0); + using Vec = Matrix; + Vec a = Vec::Random(n); + Vec b = Vec::Random(n); + for (auto _ : state) { + Scalar d = a.dot(b); + benchmark::DoNotOptimize(d); + } + state.counters["GFLOPS"] = benchmark::Counter(dotFlops(n), benchmark::Counter::kIsIterationInvariantRate, + benchmark::Counter::kIs1000); +} + +template +static void BM_SquaredNorm(benchmark::State& state) { + const Index n = state.range(0); + using Vec = Matrix; + Vec a = Vec::Random(n); + for (auto _ : state) { + auto d = a.squaredNorm(); + benchmark::DoNotOptimize(d); + } + state.counters["GFLOPS"] = benchmark::Counter(dotFlops(n), benchmark::Counter::kIsIterationInvariantRate, + benchmark::Counter::kIs1000); +} + +static void DotSizes(::benchmark::Benchmark* b) { + for (int n : {64, 256, 1024, 4096, 16384, 65536, 262144, 1048576}) b->Arg(n); +} + +BENCHMARK(BM_Dot)->Apply(DotSizes)->Name("Dot_float"); +BENCHMARK(BM_Dot)->Apply(DotSizes)->Name("Dot_double"); +BENCHMARK(BM_Dot>)->Apply(DotSizes)->Name("Dot_cfloat"); +BENCHMARK(BM_Dot>)->Apply(DotSizes)->Name("Dot_cdouble"); +BENCHMARK(BM_SquaredNorm)->Apply(DotSizes)->Name("SquaredNorm_float"); +BENCHMARK(BM_SquaredNorm)->Apply(DotSizes)->Name("SquaredNorm_double"); diff --git a/benchmarks/Core/bench_map.cpp b/benchmarks/Core/bench_map.cpp new file mode 100644 index 000000000..1b8bfccea --- /dev/null +++ b/benchmarks/Core/bench_map.cpp @@ -0,0 +1,103 @@ +// Benchmarks for Map and Ref with various strides. +// +// Compares contiguous Map vs strided Map vs owned matrix for basic +// operations (GEMV and vector sum). + +#include +#include + +using namespace Eigen; + +// Sum a contiguous Map. +template +static void BM_MapContiguousSum(benchmark::State& state) { + const Index n = state.range(0); + std::vector buf(n); + Map> v(buf.data(), n); + v.setRandom(); + for (auto _ : state) { + Scalar s = v.sum(); + benchmark::DoNotOptimize(s); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); +} + +// Sum a strided Map (InnerStride). +template +static void BM_MapStridedSum(benchmark::State& state) { + const Index n = state.range(0); + const Index stride = 3; + std::vector buf(n * stride); + Map, 0, InnerStride<>> v(buf.data(), n, InnerStride<>(stride)); + v.setRandom(); + for (auto _ : state) { + Scalar s = v.sum(); + benchmark::DoNotOptimize(s); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); +} + +// Sum an owned VectorX (baseline). +template +static void BM_OwnedSum(benchmark::State& state) { + const Index n = state.range(0); + Matrix v = Matrix::Random(n); + for (auto _ : state) { + Scalar s = v.sum(); + benchmark::DoNotOptimize(s); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); +} + +// GEMV through contiguous Map. +template +static void BM_MapGemv(benchmark::State& state) { + const Index n = state.range(0); + std::vector buf(n * n); + Map> A(buf.data(), n, n); + A.setRandom(); + Matrix x = Matrix::Random(n); + Matrix y = Matrix::Random(n); + for (auto _ : state) { + y.noalias() += A * x; + benchmark::DoNotOptimize(y.data()); + benchmark::ClobberMemory(); + } + state.counters["GFLOPS"] = + benchmark::Counter(2.0 * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000); +} + +// GEMV with owned matrix (baseline). +template +static void BM_OwnedGemv(benchmark::State& state) { + const Index n = state.range(0); + Matrix A = Matrix::Random(n, n); + Matrix x = Matrix::Random(n); + Matrix y = Matrix::Random(n); + for (auto _ : state) { + y.noalias() += A * x; + benchmark::DoNotOptimize(y.data()); + benchmark::ClobberMemory(); + } + state.counters["GFLOPS"] = + benchmark::Counter(2.0 * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000); +} + +static void SumSizes(::benchmark::Benchmark* b) { + for (int n : {256, 1024, 4096, 16384, 65536, 262144, 1048576}) b->Arg(n); +} + +static void GemvSizes(::benchmark::Benchmark* b) { + for (int n : {32, 128, 512, 1024}) b->Arg(n); +} + +BENCHMARK(BM_MapContiguousSum)->Apply(SumSizes)->Name("MapContiguousSum_float"); +BENCHMARK(BM_MapStridedSum)->Apply(SumSizes)->Name("MapStridedSum_float"); +BENCHMARK(BM_OwnedSum)->Apply(SumSizes)->Name("OwnedSum_float"); +BENCHMARK(BM_MapContiguousSum)->Apply(SumSizes)->Name("MapContiguousSum_double"); +BENCHMARK(BM_MapStridedSum)->Apply(SumSizes)->Name("MapStridedSum_double"); +BENCHMARK(BM_OwnedSum)->Apply(SumSizes)->Name("OwnedSum_double"); +BENCHMARK(BM_MapGemv)->Apply(GemvSizes)->Name("MapGemv_float"); +BENCHMARK(BM_OwnedGemv)->Apply(GemvSizes)->Name("OwnedGemv_float"); +BENCHMARK(BM_MapGemv)->Apply(GemvSizes)->Name("MapGemv_double"); +BENCHMARK(BM_OwnedGemv)->Apply(GemvSizes)->Name("OwnedGemv_double"); diff --git a/benchmarks/Core/bench_reductions.cpp b/benchmarks/Core/bench_reductions.cpp new file mode 100644 index 000000000..049bf8e1a --- /dev/null +++ b/benchmarks/Core/bench_reductions.cpp @@ -0,0 +1,173 @@ +// Benchmarks for full reductions: sum, prod, minCoeff, maxCoeff, mean, +// norm, squaredNorm, lpNorm<1>, lpNorm. +// +// These are memory-bandwidth-bound for large vectors, so we report +// bytes processed rather than FLOPS. + +#include +#include + +using namespace Eigen; + +// --- Vector reductions (1-D) --- + +template +static void BM_VectorSum(benchmark::State& state) { + const Index n = state.range(0); + Matrix v = Matrix::Random(n); + for (auto _ : state) { + Scalar s = v.sum(); + benchmark::DoNotOptimize(s); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); +} + +template +static void BM_VectorProd(benchmark::State& state) { + const Index n = state.range(0); + Matrix v = Matrix::Constant(n, Scalar(1)); + // Use values near 1 to avoid overflow/underflow. + v += Scalar(0.001) * Matrix::Random(n); + for (auto _ : state) { + Scalar p = v.prod(); + benchmark::DoNotOptimize(p); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); +} + +template +static void BM_VectorMinCoeff(benchmark::State& state) { + const Index n = state.range(0); + Matrix v = Matrix::Random(n); + for (auto _ : state) { + Scalar m = v.minCoeff(); + benchmark::DoNotOptimize(m); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); +} + +template +static void BM_VectorMaxCoeff(benchmark::State& state) { + const Index n = state.range(0); + Matrix v = Matrix::Random(n); + for (auto _ : state) { + Scalar m = v.maxCoeff(); + benchmark::DoNotOptimize(m); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); +} + +template +static void BM_VectorMean(benchmark::State& state) { + const Index n = state.range(0); + Matrix v = Matrix::Random(n); + for (auto _ : state) { + Scalar m = v.mean(); + benchmark::DoNotOptimize(m); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); +} + +template +static void BM_VectorSquaredNorm(benchmark::State& state) { + const Index n = state.range(0); + Matrix v = Matrix::Random(n); + for (auto _ : state) { + Scalar s = v.squaredNorm(); + benchmark::DoNotOptimize(s); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); +} + +template +static void BM_VectorNorm(benchmark::State& state) { + const Index n = state.range(0); + Matrix v = Matrix::Random(n); + for (auto _ : state) { + Scalar s = v.norm(); + benchmark::DoNotOptimize(s); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); +} + +template +static void BM_VectorLpNorm1(benchmark::State& state) { + const Index n = state.range(0); + Matrix v = Matrix::Random(n); + for (auto _ : state) { + Scalar s = v.template lpNorm<1>(); + benchmark::DoNotOptimize(s); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); +} + +template +static void BM_VectorLpNormInf(benchmark::State& state) { + const Index n = state.range(0); + Matrix v = Matrix::Random(n); + for (auto _ : state) { + Scalar s = v.template lpNorm(); + benchmark::DoNotOptimize(s); + } + state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); +} + +// --- Matrix reductions (2-D) --- + +template +static void BM_MatrixSum(benchmark::State& state) { + const Index n = state.range(0); + Matrix m = Matrix::Random(n, n); + for (auto _ : state) { + Scalar s = m.sum(); + benchmark::DoNotOptimize(s); + } + state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar)); +} + +template +static void BM_MatrixNorm(benchmark::State& state) { + const Index n = state.range(0); + Matrix m = Matrix::Random(n, n); + for (auto _ : state) { + Scalar s = m.norm(); + benchmark::DoNotOptimize(s); + } + state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar)); +} + +// --- Size configurations --- + +static void VectorSizes(::benchmark::Benchmark* b) { + for (int n : {64, 256, 1024, 4096, 16384, 65536, 262144, 1048576}) b->Arg(n); +} + +static void MatrixSizes(::benchmark::Benchmark* b) { + for (int n : {8, 32, 64, 128, 256, 512, 1024}) b->Arg(n); +} + +// --- Register: float --- +BENCHMARK(BM_VectorSum)->Apply(VectorSizes)->Name("VectorSum_float"); +BENCHMARK(BM_VectorProd)->Apply(VectorSizes)->Name("VectorProd_float"); +BENCHMARK(BM_VectorMinCoeff)->Apply(VectorSizes)->Name("VectorMinCoeff_float"); +BENCHMARK(BM_VectorMaxCoeff)->Apply(VectorSizes)->Name("VectorMaxCoeff_float"); +BENCHMARK(BM_VectorMean)->Apply(VectorSizes)->Name("VectorMean_float"); +BENCHMARK(BM_VectorSquaredNorm)->Apply(VectorSizes)->Name("VectorSquaredNorm_float"); +BENCHMARK(BM_VectorNorm)->Apply(VectorSizes)->Name("VectorNorm_float"); +BENCHMARK(BM_VectorLpNorm1)->Apply(VectorSizes)->Name("VectorLpNorm1_float"); +BENCHMARK(BM_VectorLpNormInf)->Apply(VectorSizes)->Name("VectorLpNormInf_float"); +BENCHMARK(BM_MatrixSum)->Apply(MatrixSizes)->Name("MatrixSum_float"); +BENCHMARK(BM_MatrixNorm)->Apply(MatrixSizes)->Name("MatrixNorm_float"); + +// --- Register: double --- +BENCHMARK(BM_VectorSum)->Apply(VectorSizes)->Name("VectorSum_double"); +BENCHMARK(BM_VectorProd)->Apply(VectorSizes)->Name("VectorProd_double"); +BENCHMARK(BM_VectorMinCoeff)->Apply(VectorSizes)->Name("VectorMinCoeff_double"); +BENCHMARK(BM_VectorMaxCoeff)->Apply(VectorSizes)->Name("VectorMaxCoeff_double"); +BENCHMARK(BM_VectorMean)->Apply(VectorSizes)->Name("VectorMean_double"); +BENCHMARK(BM_VectorSquaredNorm)->Apply(VectorSizes)->Name("VectorSquaredNorm_double"); +BENCHMARK(BM_VectorNorm)->Apply(VectorSizes)->Name("VectorNorm_double"); +BENCHMARK(BM_VectorLpNorm1)->Apply(VectorSizes)->Name("VectorLpNorm1_double"); +BENCHMARK(BM_VectorLpNormInf)->Apply(VectorSizes)->Name("VectorLpNormInf_double"); +BENCHMARK(BM_MatrixSum)->Apply(MatrixSizes)->Name("MatrixSum_double"); +BENCHMARK(BM_MatrixNorm)->Apply(MatrixSizes)->Name("MatrixNorm_double"); diff --git a/benchmarks/Core/bench_selfadjoint_product.cpp b/benchmarks/Core/bench_selfadjoint_product.cpp new file mode 100644 index 000000000..ec17d0a19 --- /dev/null +++ b/benchmarks/Core/bench_selfadjoint_product.cpp @@ -0,0 +1,78 @@ +// Benchmarks for self-adjoint (symmetric/hermitian) matrix operations. +// +// Tests SYMM (selfadjointView * dense) and rank-k updates. + +#include +#include + +using namespace Eigen; + +// C = selfadjointView(A) * B (SYMM) +template +static void BM_SYMM_Left(benchmark::State& state) { + const Index n = state.range(0); + using Mat = Matrix; + Mat A = Mat::Random(n, n); + A = (A + A.transpose()).eval() / Scalar(2); + Mat B = Mat::Random(n, n); + Mat C(n, n); + for (auto _ : state) { + C.noalias() = A.template selfadjointView() * B; + benchmark::DoNotOptimize(C.data()); + } + state.counters["GFLOPS"] = + benchmark::Counter(2.0 * n * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000); +} + +// C = B * selfadjointView(A) +template +static void BM_SYMM_Right(benchmark::State& state) { + const Index n = state.range(0); + using Mat = Matrix; + Mat A = Mat::Random(n, n); + A = (A + A.transpose()).eval() / Scalar(2); + Mat B = Mat::Random(n, n); + Mat C(n, n); + for (auto _ : state) { + C.noalias() = B * A.template selfadjointView(); + benchmark::DoNotOptimize(C.data()); + } + state.counters["GFLOPS"] = + benchmark::Counter(2.0 * n * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000); +} + +// Rank-k update: C.selfadjointView().rankUpdate(A) +// Computes C += A * A^T +template +static void BM_RankUpdate(benchmark::State& state) { + const Index n = state.range(0); + const Index k = state.range(1); + using Mat = Matrix; + Mat A = Mat::Random(n, k); + Mat C = Mat::Zero(n, n); + for (auto _ : state) { + C.template selfadjointView().rankUpdate(A); + benchmark::DoNotOptimize(C.data()); + } + state.counters["GFLOPS"] = + benchmark::Counter(1.0 * n * n * k, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000); +} + +static void SymmSizes(::benchmark::Benchmark* b) { + for (int n : {64, 128, 256, 512, 1024}) b->Arg(n); +} + +static void RankUpdateSizes(::benchmark::Benchmark* b) { + for (int n : {64, 128, 256, 512}) { + for (int k : {16, 64, 256}) { + b->Args({n, k}); + } + } +} + +BENCHMARK(BM_SYMM_Left)->Apply(SymmSizes)->Name("SYMM_Left_float"); +BENCHMARK(BM_SYMM_Left)->Apply(SymmSizes)->Name("SYMM_Left_double"); +BENCHMARK(BM_SYMM_Right)->Apply(SymmSizes)->Name("SYMM_Right_float"); +BENCHMARK(BM_SYMM_Right)->Apply(SymmSizes)->Name("SYMM_Right_double"); +BENCHMARK(BM_RankUpdate)->Apply(RankUpdateSizes)->Name("RankUpdate_float"); +BENCHMARK(BM_RankUpdate)->Apply(RankUpdateSizes)->Name("RankUpdate_double"); diff --git a/benchmarks/Core/bench_triangular_product.cpp b/benchmarks/Core/bench_triangular_product.cpp new file mode 100644 index 000000000..95d515ce6 --- /dev/null +++ b/benchmarks/Core/bench_triangular_product.cpp @@ -0,0 +1,56 @@ +// Benchmarks for triangular-dense matrix products (TRMM). +// +// Tests C = triangular(A) * B for various modes (Lower/Upper) and sides (Left/Right). + +#include +#include + +using namespace Eigen; + +// C = triangularView(A) * B +template +static void BM_TRMM_Left(benchmark::State& state) { + const Index n = state.range(0); + using Mat = Matrix; + Mat A = Mat::Random(n, n); + Mat B = Mat::Random(n, n); + Mat C(n, n); + for (auto _ : state) { + C.noalias() = A.template triangularView() * B; + benchmark::DoNotOptimize(C.data()); + } + state.counters["GFLOPS"] = + benchmark::Counter(1.0 * n * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000); +} + +// C = B * triangularView(A) +template +static void BM_TRMM_Right(benchmark::State& state) { + const Index n = state.range(0); + using Mat = Matrix; + Mat A = Mat::Random(n, n); + Mat B = Mat::Random(n, n); + Mat C(n, n); + for (auto _ : state) { + C.noalias() = B * A.template triangularView(); + benchmark::DoNotOptimize(C.data()); + } + state.counters["GFLOPS"] = + benchmark::Counter(1.0 * n * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000); +} + +static void TrmmSizes(::benchmark::Benchmark* b) { + for (int n : {64, 128, 256, 512, 1024}) b->Arg(n); +} + +// Left product +BENCHMARK(BM_TRMM_Left)->Apply(TrmmSizes)->Name("TRMM_Left_float_Lower"); +BENCHMARK(BM_TRMM_Left)->Apply(TrmmSizes)->Name("TRMM_Left_float_Upper"); +BENCHMARK(BM_TRMM_Left)->Apply(TrmmSizes)->Name("TRMM_Left_double_Lower"); +BENCHMARK(BM_TRMM_Left)->Apply(TrmmSizes)->Name("TRMM_Left_double_Upper"); + +// Right product +BENCHMARK(BM_TRMM_Right)->Apply(TrmmSizes)->Name("TRMM_Right_float_Lower"); +BENCHMARK(BM_TRMM_Right)->Apply(TrmmSizes)->Name("TRMM_Right_float_Upper"); +BENCHMARK(BM_TRMM_Right)->Apply(TrmmSizes)->Name("TRMM_Right_double_Lower"); +BENCHMARK(BM_TRMM_Right)->Apply(TrmmSizes)->Name("TRMM_Right_double_Upper"); diff --git a/benchmarks/LU/CMakeLists.txt b/benchmarks/LU/CMakeLists.txt index aa4e0fbf3..e28693382 100644 --- a/benchmarks/LU/CMakeLists.txt +++ b/benchmarks/LU/CMakeLists.txt @@ -1 +1 @@ -# LU benchmarks will be added here. +eigen_add_benchmark(bench_lu bench_lu.cpp) diff --git a/benchmarks/LU/bench_lu.cpp b/benchmarks/LU/bench_lu.cpp new file mode 100644 index 000000000..f783cc916 --- /dev/null +++ b/benchmarks/LU/bench_lu.cpp @@ -0,0 +1,139 @@ +// Benchmarks for LU decompositions. +// +// Tests PartialPivLU and FullPivLU: compute, solve, inverse, determinant. + +#include +#include + +using namespace Eigen; + +typedef Matrix Matf; +typedef Matrix Matd; + +// --- PartialPivLU --- + +template +EIGEN_DONT_INLINE void do_compute(PartialPivLU>& lu, + const Matrix& A) { + lu.compute(A); +} + +template +static void BM_PartialPivLU_Compute(benchmark::State& state) { + const Index n = state.range(0); + using Mat = Matrix; + Mat A = Mat::Random(n, n); + PartialPivLU lu(n); + for (auto _ : state) { + do_compute(lu, A); + benchmark::DoNotOptimize(lu.matrixLU().data()); + } + state.SetItemsProcessed(state.iterations()); +} + +template +static void BM_PartialPivLU_Solve(benchmark::State& state) { + const Index n = state.range(0); + const Index nrhs = state.range(1); + using Mat = Matrix; + Mat A = Mat::Random(n, n); + Mat B = Mat::Random(n, nrhs); + PartialPivLU lu(A); + Mat X(n, nrhs); + for (auto _ : state) { + X = lu.solve(B); + benchmark::DoNotOptimize(X.data()); + } + state.SetItemsProcessed(state.iterations()); +} + +template +static void BM_PartialPivLU_Inverse(benchmark::State& state) { + const Index n = state.range(0); + using Mat = Matrix; + Mat A = Mat::Random(n, n); + PartialPivLU lu(A); + Mat inv(n, n); + for (auto _ : state) { + inv = lu.inverse(); + benchmark::DoNotOptimize(inv.data()); + } + state.SetItemsProcessed(state.iterations()); +} + +template +static void BM_PartialPivLU_Determinant(benchmark::State& state) { + const Index n = state.range(0); + using Mat = Matrix; + Mat A = Mat::Random(n, n); + PartialPivLU lu(A); + for (auto _ : state) { + Scalar d = lu.determinant(); + benchmark::DoNotOptimize(d); + } + state.SetItemsProcessed(state.iterations()); +} + +// --- FullPivLU --- + +template +EIGEN_DONT_INLINE void do_compute(FullPivLU>& lu, + const Matrix& A) { + lu.compute(A); +} + +template +static void BM_FullPivLU_Compute(benchmark::State& state) { + const Index n = state.range(0); + using Mat = Matrix; + Mat A = Mat::Random(n, n); + FullPivLU lu(n, n); + for (auto _ : state) { + do_compute(lu, A); + benchmark::DoNotOptimize(lu.matrixLU().data()); + } + state.SetItemsProcessed(state.iterations()); +} + +template +static void BM_FullPivLU_Solve(benchmark::State& state) { + const Index n = state.range(0); + const Index nrhs = state.range(1); + using Mat = Matrix; + Mat A = Mat::Random(n, n); + Mat B = Mat::Random(n, nrhs); + FullPivLU lu(A); + Mat X(n, nrhs); + for (auto _ : state) { + X = lu.solve(B); + benchmark::DoNotOptimize(X.data()); + } + state.SetItemsProcessed(state.iterations()); +} + +// --- Size configurations --- + +static void SquareSizes(::benchmark::Benchmark* b) { + for (int n : {8, 32, 64, 128, 256, 512, 1024}) b->Arg(n); +} + +static void SolveSizes(::benchmark::Benchmark* b) { + for (int n : {32, 128, 512, 1024}) { + for (int nrhs : {1, 16, 64}) { + b->Args({n, nrhs}); + } + } +} + +BENCHMARK(BM_PartialPivLU_Compute)->Apply(SquareSizes)->Name("PartialPivLU_Compute_float"); +BENCHMARK(BM_PartialPivLU_Compute)->Apply(SquareSizes)->Name("PartialPivLU_Compute_double"); +BENCHMARK(BM_PartialPivLU_Solve)->Apply(SolveSizes)->Name("PartialPivLU_Solve_float"); +BENCHMARK(BM_PartialPivLU_Solve)->Apply(SolveSizes)->Name("PartialPivLU_Solve_double"); +BENCHMARK(BM_PartialPivLU_Inverse)->Apply(SquareSizes)->Name("PartialPivLU_Inverse_float"); +BENCHMARK(BM_PartialPivLU_Inverse)->Apply(SquareSizes)->Name("PartialPivLU_Inverse_double"); +BENCHMARK(BM_PartialPivLU_Determinant)->Apply(SquareSizes)->Name("PartialPivLU_Determinant_float"); +BENCHMARK(BM_PartialPivLU_Determinant)->Apply(SquareSizes)->Name("PartialPivLU_Determinant_double"); +BENCHMARK(BM_FullPivLU_Compute)->Apply(SquareSizes)->Name("FullPivLU_Compute_float"); +BENCHMARK(BM_FullPivLU_Compute)->Apply(SquareSizes)->Name("FullPivLU_Compute_double"); +BENCHMARK(BM_FullPivLU_Solve)->Apply(SolveSizes)->Name("FullPivLU_Solve_float"); +BENCHMARK(BM_FullPivLU_Solve)->Apply(SolveSizes)->Name("FullPivLU_Solve_double"); diff --git a/benchmarks/QR/CMakeLists.txt b/benchmarks/QR/CMakeLists.txt index 2a0bc0c7d..2c90f4c41 100644 --- a/benchmarks/QR/CMakeLists.txt +++ b/benchmarks/QR/CMakeLists.txt @@ -1 +1 @@ -# QR benchmarks will be added here. +eigen_add_benchmark(bench_qr bench_qr.cpp) diff --git a/benchmarks/QR/bench_qr.cpp b/benchmarks/QR/bench_qr.cpp new file mode 100644 index 000000000..e4b57e962 --- /dev/null +++ b/benchmarks/QR/bench_qr.cpp @@ -0,0 +1,123 @@ +// Benchmarks for QR decompositions. +// +// Tests HouseholderQR, ColPivHouseholderQR, FullPivHouseholderQR, and COD. +// Both square and tall-thin matrix shapes are tested. + +#include +#include + +using namespace Eigen; + +template +EIGEN_DONT_INLINE void do_compute(QR& qr, const typename QR::MatrixType& A) { + qr.compute(A); +} + +// --- HouseholderQR --- + +template +static void BM_HouseholderQR(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + Mat A = Mat::Random(rows, cols); + HouseholderQR qr(rows, cols); + for (auto _ : state) { + do_compute(qr, A); + benchmark::DoNotOptimize(qr.matrixQR().data()); + } + state.SetItemsProcessed(state.iterations()); +} + +// --- ColPivHouseholderQR --- + +template +static void BM_ColPivHouseholderQR(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + Mat A = Mat::Random(rows, cols); + ColPivHouseholderQR qr(rows, cols); + for (auto _ : state) { + do_compute(qr, A); + benchmark::DoNotOptimize(qr.matrixQR().data()); + } + state.SetItemsProcessed(state.iterations()); +} + +// --- FullPivHouseholderQR --- + +template +static void BM_FullPivHouseholderQR(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + Mat A = Mat::Random(rows, cols); + FullPivHouseholderQR qr(rows, cols); + for (auto _ : state) { + do_compute(qr, A); + benchmark::DoNotOptimize(qr.matrixQR().data()); + } + state.SetItemsProcessed(state.iterations()); +} + +// --- CompleteOrthogonalDecomposition (COD) --- + +template +static void BM_COD(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + Mat A = Mat::Random(rows, cols); + CompleteOrthogonalDecomposition cod(rows, cols); + for (auto _ : state) { + do_compute(cod, A); + benchmark::DoNotOptimize(cod.matrixQTZ().data()); + } + state.SetItemsProcessed(state.iterations()); +} + +// --- QR solve --- + +template +static void BM_HouseholderQR_Solve(benchmark::State& state) { + const Index rows = state.range(0); + const Index cols = state.range(1); + using Mat = Matrix; + using Vec = Matrix; + Mat A = Mat::Random(rows, cols); + Vec b = Vec::Random(rows); + HouseholderQR qr(A); + Vec x(cols); + for (auto _ : state) { + x = qr.solve(b); + benchmark::DoNotOptimize(x.data()); + } + state.SetItemsProcessed(state.iterations()); +} + +// --- Size configurations --- + +static void QrSizes(::benchmark::Benchmark* b) { + // Square + for (int n : {32, 64, 128, 256, 512, 1024}) b->Args({n, n}); + // Tall-thin + b->Args({1000, 32}); + b->Args({1000, 100}); + b->Args({10000, 32}); + b->Args({10000, 100}); +} + +// Register: float +BENCHMARK(BM_HouseholderQR)->Apply(QrSizes)->Name("HouseholderQR_float"); +BENCHMARK(BM_ColPivHouseholderQR)->Apply(QrSizes)->Name("ColPivHouseholderQR_float"); +BENCHMARK(BM_FullPivHouseholderQR)->Apply(QrSizes)->Name("FullPivHouseholderQR_float"); +BENCHMARK(BM_COD)->Apply(QrSizes)->Name("COD_float"); +BENCHMARK(BM_HouseholderQR_Solve)->Apply(QrSizes)->Name("HouseholderQR_Solve_float"); + +// Register: double +BENCHMARK(BM_HouseholderQR)->Apply(QrSizes)->Name("HouseholderQR_double"); +BENCHMARK(BM_ColPivHouseholderQR)->Apply(QrSizes)->Name("ColPivHouseholderQR_double"); +BENCHMARK(BM_FullPivHouseholderQR)->Apply(QrSizes)->Name("FullPivHouseholderQR_double"); +BENCHMARK(BM_COD)->Apply(QrSizes)->Name("COD_double"); +BENCHMARK(BM_HouseholderQR_Solve)->Apply(QrSizes)->Name("HouseholderQR_Solve_double");