mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Add new benchmarks for Core, LU, and QR operations
libeigen/eigen!2177 Closes #3035 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
@@ -5,3 +5,13 @@ eigen_add_benchmark(bench_vecadd bench_vecadd.cpp)
|
||||
eigen_add_benchmark(bench_trsm bench_trsm.cpp)
|
||||
eigen_add_benchmark(bench_reverse bench_reverse.cpp)
|
||||
eigen_add_benchmark(bench_move_semantics bench_move_semantics.cpp)
|
||||
eigen_add_benchmark(bench_reductions bench_reductions.cpp)
|
||||
eigen_add_benchmark(bench_dot bench_dot.cpp)
|
||||
eigen_add_benchmark(bench_cwise_math bench_cwise_math.cpp)
|
||||
eigen_add_benchmark(bench_broadcasting bench_broadcasting.cpp)
|
||||
eigen_add_benchmark(bench_block_ops bench_block_ops.cpp)
|
||||
eigen_add_benchmark(bench_map bench_map.cpp)
|
||||
eigen_add_benchmark(bench_diagonal bench_diagonal.cpp)
|
||||
eigen_add_benchmark(bench_triangular_product bench_triangular_product.cpp)
|
||||
eigen_add_benchmark(bench_selfadjoint_product bench_selfadjoint_product.cpp)
|
||||
eigen_add_benchmark(bench_construction bench_construction.cpp)
|
||||
|
||||
89
benchmarks/Core/bench_block_ops.cpp
Normal file
89
benchmarks/Core/bench_block_ops.cpp
Normal file
@@ -0,0 +1,89 @@
|
||||
// Benchmarks for block extraction and assignment operations.
|
||||
//
|
||||
// Tests sub-matrix views: block(), topRows(), leftCols(), middleCols().
|
||||
// Measures expression template overhead for read and write patterns.
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
// Read a block and assign to a separate matrix (forces evaluation).
|
||||
template <typename Scalar>
|
||||
static void BM_BlockRead(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
const Index block_size = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat src = Mat::Random(n, n);
|
||||
Mat dst(block_size, block_size);
|
||||
const Index off = (n - block_size) / 2;
|
||||
for (auto _ : state) {
|
||||
dst = src.block(off, off, block_size, block_size);
|
||||
benchmark::DoNotOptimize(dst.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * block_size * block_size * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// Write into a block of a larger matrix.
|
||||
template <typename Scalar>
|
||||
static void BM_BlockWrite(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
const Index block_size = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat dst = Mat::Random(n, n);
|
||||
Mat src = Mat::Random(block_size, block_size);
|
||||
const Index off = (n - block_size) / 2;
|
||||
for (auto _ : state) {
|
||||
dst.block(off, off, block_size, block_size) = src;
|
||||
benchmark::DoNotOptimize(dst.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * block_size * block_size * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// topRows extraction.
|
||||
template <typename Scalar>
|
||||
static void BM_TopRows(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
const Index k = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat src = Mat::Random(n, n);
|
||||
Mat dst(k, n);
|
||||
for (auto _ : state) {
|
||||
dst = src.topRows(k);
|
||||
benchmark::DoNotOptimize(dst.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * k * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// leftCols extraction.
|
||||
template <typename Scalar>
|
||||
static void BM_LeftCols(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
const Index k = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat src = Mat::Random(n, n);
|
||||
Mat dst(n, k);
|
||||
for (auto _ : state) {
|
||||
dst = src.leftCols(k);
|
||||
benchmark::DoNotOptimize(dst.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * k * sizeof(Scalar));
|
||||
}
|
||||
|
||||
static void BlockSizes(::benchmark::Benchmark* b) {
|
||||
// (matrix_size, block_size)
|
||||
for (int n : {256, 512, 1024}) {
|
||||
for (int bs : {16, 64, 128}) {
|
||||
if (bs <= n) b->Args({n, bs});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(BM_BlockRead<float>)->Apply(BlockSizes)->Name("BlockRead_float");
|
||||
BENCHMARK(BM_BlockRead<double>)->Apply(BlockSizes)->Name("BlockRead_double");
|
||||
BENCHMARK(BM_BlockWrite<float>)->Apply(BlockSizes)->Name("BlockWrite_float");
|
||||
BENCHMARK(BM_BlockWrite<double>)->Apply(BlockSizes)->Name("BlockWrite_double");
|
||||
BENCHMARK(BM_TopRows<float>)->Apply(BlockSizes)->Name("TopRows_float");
|
||||
BENCHMARK(BM_TopRows<double>)->Apply(BlockSizes)->Name("TopRows_double");
|
||||
BENCHMARK(BM_LeftCols<float>)->Apply(BlockSizes)->Name("LeftCols_float");
|
||||
BENCHMARK(BM_LeftCols<double>)->Apply(BlockSizes)->Name("LeftCols_double");
|
||||
192
benchmarks/Core/bench_broadcasting.cpp
Normal file
192
benchmarks/Core/bench_broadcasting.cpp
Normal file
@@ -0,0 +1,192 @@
|
||||
// Benchmarks for colwise/rowwise reductions and broadcasting operations.
|
||||
//
|
||||
// Tests vectorwise reductions (sum, mean, norm, minCoeff, maxCoeff) and
|
||||
// broadcasting arithmetic (rowwise += vec, colwise -= vec, rowwise *= vec).
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
// --- Colwise reductions (reduce each column to a scalar) ---
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_ColwiseSum(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat m = Mat::Random(rows, cols);
|
||||
Matrix<Scalar, 1, Dynamic> result(cols);
|
||||
for (auto _ : state) {
|
||||
result = m.colwise().sum();
|
||||
benchmark::DoNotOptimize(result.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_ColwiseMean(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat m = Mat::Random(rows, cols);
|
||||
Matrix<Scalar, 1, Dynamic> result(cols);
|
||||
for (auto _ : state) {
|
||||
result = m.colwise().mean();
|
||||
benchmark::DoNotOptimize(result.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_ColwiseNorm(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat m = Mat::Random(rows, cols);
|
||||
Matrix<Scalar, 1, Dynamic> result(cols);
|
||||
for (auto _ : state) {
|
||||
result = m.colwise().norm();
|
||||
benchmark::DoNotOptimize(result.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_ColwiseMinCoeff(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat m = Mat::Random(rows, cols);
|
||||
Matrix<Scalar, 1, Dynamic> result(cols);
|
||||
for (auto _ : state) {
|
||||
result = m.colwise().minCoeff();
|
||||
benchmark::DoNotOptimize(result.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_ColwiseMaxCoeff(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat m = Mat::Random(rows, cols);
|
||||
Matrix<Scalar, 1, Dynamic> result(cols);
|
||||
for (auto _ : state) {
|
||||
result = m.colwise().maxCoeff();
|
||||
benchmark::DoNotOptimize(result.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// --- Rowwise reductions (reduce each row to a scalar) ---
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_RowwiseSum(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat m = Mat::Random(rows, cols);
|
||||
Matrix<Scalar, Dynamic, 1> result(rows);
|
||||
for (auto _ : state) {
|
||||
result = m.rowwise().sum();
|
||||
benchmark::DoNotOptimize(result.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_RowwiseNorm(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat m = Mat::Random(rows, cols);
|
||||
Matrix<Scalar, Dynamic, 1> result(rows);
|
||||
for (auto _ : state) {
|
||||
result = m.rowwise().norm();
|
||||
benchmark::DoNotOptimize(result.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// --- Broadcasting operations ---
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_RowwiseBroadcastAdd(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
using Vec = Matrix<Scalar, 1, Dynamic>;
|
||||
Mat m = Mat::Random(rows, cols);
|
||||
Vec v = Vec::Random(cols);
|
||||
for (auto _ : state) {
|
||||
m.noalias() = m.rowwise() + v;
|
||||
benchmark::DoNotOptimize(m.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar) * 2);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_ColwiseBroadcastAdd(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
using Vec = Matrix<Scalar, Dynamic, 1>;
|
||||
Mat m = Mat::Random(rows, cols);
|
||||
Vec v = Vec::Random(rows);
|
||||
for (auto _ : state) {
|
||||
m.noalias() = m.colwise() + v;
|
||||
benchmark::DoNotOptimize(m.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar) * 2);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_RowwiseBroadcastMul(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat m = Mat::Random(rows, cols);
|
||||
Array<Scalar, 1, Dynamic> v = Array<Scalar, 1, Dynamic>::Random(cols);
|
||||
for (auto _ : state) {
|
||||
m.array().rowwise() *= v;
|
||||
benchmark::DoNotOptimize(m.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar) * 2);
|
||||
}
|
||||
|
||||
// --- Size configurations ---
|
||||
|
||||
static void BroadcastSizes(::benchmark::Benchmark* b) {
|
||||
// Square matrices
|
||||
for (int n : {64, 128, 256, 512, 1024}) b->Args({n, n});
|
||||
// Tall-thin (many rows, few cols)
|
||||
b->Args({10000, 32});
|
||||
// Short-wide (few rows, many cols)
|
||||
b->Args({32, 10000});
|
||||
}
|
||||
|
||||
// --- Register: float ---
|
||||
BENCHMARK(BM_ColwiseSum<float>)->Apply(BroadcastSizes)->Name("ColwiseSum_float");
|
||||
BENCHMARK(BM_ColwiseMean<float>)->Apply(BroadcastSizes)->Name("ColwiseMean_float");
|
||||
BENCHMARK(BM_ColwiseNorm<float>)->Apply(BroadcastSizes)->Name("ColwiseNorm_float");
|
||||
BENCHMARK(BM_ColwiseMinCoeff<float>)->Apply(BroadcastSizes)->Name("ColwiseMinCoeff_float");
|
||||
BENCHMARK(BM_ColwiseMaxCoeff<float>)->Apply(BroadcastSizes)->Name("ColwiseMaxCoeff_float");
|
||||
BENCHMARK(BM_RowwiseSum<float>)->Apply(BroadcastSizes)->Name("RowwiseSum_float");
|
||||
BENCHMARK(BM_RowwiseNorm<float>)->Apply(BroadcastSizes)->Name("RowwiseNorm_float");
|
||||
BENCHMARK(BM_RowwiseBroadcastAdd<float>)->Apply(BroadcastSizes)->Name("RowwiseBroadcastAdd_float");
|
||||
BENCHMARK(BM_ColwiseBroadcastAdd<float>)->Apply(BroadcastSizes)->Name("ColwiseBroadcastAdd_float");
|
||||
BENCHMARK(BM_RowwiseBroadcastMul<float>)->Apply(BroadcastSizes)->Name("RowwiseBroadcastMul_float");
|
||||
|
||||
// --- Register: double ---
|
||||
BENCHMARK(BM_ColwiseSum<double>)->Apply(BroadcastSizes)->Name("ColwiseSum_double");
|
||||
BENCHMARK(BM_ColwiseMean<double>)->Apply(BroadcastSizes)->Name("ColwiseMean_double");
|
||||
BENCHMARK(BM_ColwiseNorm<double>)->Apply(BroadcastSizes)->Name("ColwiseNorm_double");
|
||||
BENCHMARK(BM_ColwiseMinCoeff<double>)->Apply(BroadcastSizes)->Name("ColwiseMinCoeff_double");
|
||||
BENCHMARK(BM_ColwiseMaxCoeff<double>)->Apply(BroadcastSizes)->Name("ColwiseMaxCoeff_double");
|
||||
BENCHMARK(BM_RowwiseSum<double>)->Apply(BroadcastSizes)->Name("RowwiseSum_double");
|
||||
BENCHMARK(BM_RowwiseNorm<double>)->Apply(BroadcastSizes)->Name("RowwiseNorm_double");
|
||||
BENCHMARK(BM_RowwiseBroadcastAdd<double>)->Apply(BroadcastSizes)->Name("RowwiseBroadcastAdd_double");
|
||||
BENCHMARK(BM_ColwiseBroadcastAdd<double>)->Apply(BroadcastSizes)->Name("ColwiseBroadcastAdd_double");
|
||||
BENCHMARK(BM_RowwiseBroadcastMul<double>)->Apply(BroadcastSizes)->Name("RowwiseBroadcastMul_double");
|
||||
138
benchmarks/Core/bench_construction.cpp
Normal file
138
benchmarks/Core/bench_construction.cpp
Normal file
@@ -0,0 +1,138 @@
|
||||
// Benchmarks for matrix initialization / construction.
|
||||
//
|
||||
// Tests setZero, setRandom, setIdentity, LinSpaced, Zero(), Constant()
|
||||
// for both dynamic and small fixed-size matrices.
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
// --- Dynamic-size construction ---
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_SetZero(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, Dynamic> m(n, n);
|
||||
for (auto _ : state) {
|
||||
m.setZero();
|
||||
benchmark::DoNotOptimize(m.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_SetRandom(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, Dynamic> m(n, n);
|
||||
for (auto _ : state) {
|
||||
m.setRandom();
|
||||
benchmark::DoNotOptimize(m.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_SetIdentity(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, Dynamic> m(n, n);
|
||||
for (auto _ : state) {
|
||||
m.setIdentity();
|
||||
benchmark::DoNotOptimize(m.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_SetConstant(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, Dynamic> m(n, n);
|
||||
for (auto _ : state) {
|
||||
m.setConstant(Scalar(42));
|
||||
benchmark::DoNotOptimize(m.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_LinSpaced(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, 1> v(n);
|
||||
for (auto _ : state) {
|
||||
v = Matrix<Scalar, Dynamic, 1>::LinSpaced(n, Scalar(0), Scalar(1));
|
||||
benchmark::DoNotOptimize(v.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// --- Fixed-size construction ---
|
||||
|
||||
template <typename Scalar, int N>
|
||||
static void BM_FixedSetZero(benchmark::State& state) {
|
||||
Matrix<Scalar, N, N> m;
|
||||
for (auto _ : state) {
|
||||
m.setZero();
|
||||
benchmark::DoNotOptimize(m.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * N * N * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar, int N>
|
||||
static void BM_FixedSetRandom(benchmark::State& state) {
|
||||
Matrix<Scalar, N, N> m;
|
||||
for (auto _ : state) {
|
||||
m.setRandom();
|
||||
benchmark::DoNotOptimize(m.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * N * N * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar, int N>
|
||||
static void BM_FixedSetIdentity(benchmark::State& state) {
|
||||
Matrix<Scalar, N, N> m;
|
||||
for (auto _ : state) {
|
||||
m.setIdentity();
|
||||
benchmark::DoNotOptimize(m.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * N * N * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// --- Size configurations ---
|
||||
|
||||
static void DynamicSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {4, 8, 16, 32, 64, 128, 256, 512, 1024}) b->Arg(n);
|
||||
}
|
||||
|
||||
static void VectorSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {64, 256, 1024, 4096, 16384, 65536}) b->Arg(n);
|
||||
}
|
||||
|
||||
// --- Register: dynamic float ---
|
||||
BENCHMARK(BM_SetZero<float>)->Apply(DynamicSizes)->Name("SetZero_float");
|
||||
BENCHMARK(BM_SetRandom<float>)->Apply(DynamicSizes)->Name("SetRandom_float");
|
||||
BENCHMARK(BM_SetIdentity<float>)->Apply(DynamicSizes)->Name("SetIdentity_float");
|
||||
BENCHMARK(BM_SetConstant<float>)->Apply(DynamicSizes)->Name("SetConstant_float");
|
||||
BENCHMARK(BM_LinSpaced<float>)->Apply(VectorSizes)->Name("LinSpaced_float");
|
||||
|
||||
// --- Register: dynamic double ---
|
||||
BENCHMARK(BM_SetZero<double>)->Apply(DynamicSizes)->Name("SetZero_double");
|
||||
BENCHMARK(BM_SetRandom<double>)->Apply(DynamicSizes)->Name("SetRandom_double");
|
||||
BENCHMARK(BM_SetIdentity<double>)->Apply(DynamicSizes)->Name("SetIdentity_double");
|
||||
BENCHMARK(BM_SetConstant<double>)->Apply(DynamicSizes)->Name("SetConstant_double");
|
||||
BENCHMARK(BM_LinSpaced<double>)->Apply(VectorSizes)->Name("LinSpaced_double");
|
||||
|
||||
// --- Register: fixed-size float ---
|
||||
BENCHMARK(BM_FixedSetZero<float, 2>)->Name("FixedSetZero_float_2x2");
|
||||
BENCHMARK(BM_FixedSetZero<float, 3>)->Name("FixedSetZero_float_3x3");
|
||||
BENCHMARK(BM_FixedSetZero<float, 4>)->Name("FixedSetZero_float_4x4");
|
||||
BENCHMARK(BM_FixedSetZero<float, 8>)->Name("FixedSetZero_float_8x8");
|
||||
BENCHMARK(BM_FixedSetRandom<float, 4>)->Name("FixedSetRandom_float_4x4");
|
||||
BENCHMARK(BM_FixedSetIdentity<float, 4>)->Name("FixedSetIdentity_float_4x4");
|
||||
|
||||
// --- Register: fixed-size double ---
|
||||
BENCHMARK(BM_FixedSetZero<double, 2>)->Name("FixedSetZero_double_2x2");
|
||||
BENCHMARK(BM_FixedSetZero<double, 3>)->Name("FixedSetZero_double_3x3");
|
||||
BENCHMARK(BM_FixedSetZero<double, 4>)->Name("FixedSetZero_double_4x4");
|
||||
BENCHMARK(BM_FixedSetZero<double, 8>)->Name("FixedSetZero_double_8x8");
|
||||
BENCHMARK(BM_FixedSetRandom<double, 4>)->Name("FixedSetRandom_double_4x4");
|
||||
BENCHMARK(BM_FixedSetIdentity<double, 4>)->Name("FixedSetIdentity_double_4x4");
|
||||
120
benchmarks/Core/bench_cwise_math.cpp
Normal file
120
benchmarks/Core/bench_cwise_math.cpp
Normal file
@@ -0,0 +1,120 @@
|
||||
// Benchmarks for vectorized coefficient-wise math functions.
|
||||
//
|
||||
// Each function is benchmarked on ArrayXf/ArrayXd with inputs chosen to
|
||||
// stay in the valid domain and avoid NaN/Inf.
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
#include <unsupported/Eigen/SpecialFunctions>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
// Macro to define a benchmark for a unary array operation.
|
||||
// NAME: benchmark function suffix (e.g. Exp)
|
||||
// EXPR: expression applied to the array (e.g. a.exp())
|
||||
// LO, HI: input range [LO, HI] mapped from the default Random() range [-1,1]
|
||||
#define BENCH_CWISE_UNARY(NAME, EXPR, LO, HI) \
|
||||
template <typename Scalar> \
|
||||
static void BM_##NAME(benchmark::State& state) { \
|
||||
const Index n = state.range(0); \
|
||||
using Arr = Array<Scalar, Dynamic, 1>; \
|
||||
/* Map Random [-1,1] to [LO, HI] */ \
|
||||
Arr a = (Arr::Random(n) + Scalar(1)) * Scalar((double(HI) - double(LO)) / 2.0) + Scalar(LO); \
|
||||
Arr b(n); \
|
||||
for (auto _ : state) { \
|
||||
b = EXPR; \
|
||||
benchmark::DoNotOptimize(b.data()); \
|
||||
} \
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar) * 2); \
|
||||
}
|
||||
|
||||
// Transcendental functions
|
||||
BENCH_CWISE_UNARY(Exp, a.exp(), -10, 10)
|
||||
BENCH_CWISE_UNARY(Log, a.log(), 0.01, 100)
|
||||
BENCH_CWISE_UNARY(Log1p, a.log1p(), -0.5, 100)
|
||||
BENCH_CWISE_UNARY(Sqrt, a.sqrt(), 0, 100)
|
||||
BENCH_CWISE_UNARY(Rsqrt, a.rsqrt(), 0.01, 100)
|
||||
|
||||
// Trigonometric functions
|
||||
BENCH_CWISE_UNARY(Sin, a.sin(), -3.14, 3.14)
|
||||
BENCH_CWISE_UNARY(Cos, a.cos(), -3.14, 3.14)
|
||||
BENCH_CWISE_UNARY(Tan, a.tan(), -1.5, 1.5)
|
||||
BENCH_CWISE_UNARY(Asin, a.asin(), -0.99, 0.99)
|
||||
BENCH_CWISE_UNARY(Atan, a.atan(), -10, 10)
|
||||
|
||||
// Hyperbolic / special
|
||||
BENCH_CWISE_UNARY(Tanh, a.tanh(), -5, 5)
|
||||
BENCH_CWISE_UNARY(Erf, Eigen::erf(a), -4, 4)
|
||||
|
||||
// Simple operations (should be very fast / memory-bound)
|
||||
BENCH_CWISE_UNARY(Abs, a.abs(), -100, 100)
|
||||
BENCH_CWISE_UNARY(Square, a.square(), -100, 100)
|
||||
BENCH_CWISE_UNARY(Cube, a.cube(), -10, 10)
|
||||
BENCH_CWISE_UNARY(Ceil, a.ceil(), -100, 100)
|
||||
BENCH_CWISE_UNARY(Floor, a.floor(), -100, 100)
|
||||
BENCH_CWISE_UNARY(Round, a.round(), -100, 100)
|
||||
|
||||
// Sigmoid: 1 / (1 + exp(-x)), common in ML.
|
||||
BENCH_CWISE_UNARY(Sigmoid, Scalar(1) / (Scalar(1) + (-a).exp()), -10, 10)
|
||||
|
||||
// Power: array^scalar
|
||||
template <typename Scalar>
|
||||
static void BM_Pow(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Arr = Array<Scalar, Dynamic, 1>;
|
||||
Arr a = (Arr::Random(n) + Scalar(1)) * Scalar(50); // [0, 100]
|
||||
Arr b(n);
|
||||
for (auto _ : state) {
|
||||
b = a.pow(Scalar(2.5));
|
||||
benchmark::DoNotOptimize(b.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar) * 2);
|
||||
}
|
||||
|
||||
static void CwiseSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {1024, 4096, 16384, 65536, 262144, 1048576}) b->Arg(n);
|
||||
}
|
||||
|
||||
// --- Register float ---
|
||||
BENCHMARK(BM_Exp<float>)->Apply(CwiseSizes)->Name("Exp_float");
|
||||
BENCHMARK(BM_Log<float>)->Apply(CwiseSizes)->Name("Log_float");
|
||||
BENCHMARK(BM_Log1p<float>)->Apply(CwiseSizes)->Name("Log1p_float");
|
||||
BENCHMARK(BM_Sqrt<float>)->Apply(CwiseSizes)->Name("Sqrt_float");
|
||||
BENCHMARK(BM_Rsqrt<float>)->Apply(CwiseSizes)->Name("Rsqrt_float");
|
||||
BENCHMARK(BM_Sin<float>)->Apply(CwiseSizes)->Name("Sin_float");
|
||||
BENCHMARK(BM_Cos<float>)->Apply(CwiseSizes)->Name("Cos_float");
|
||||
BENCHMARK(BM_Tan<float>)->Apply(CwiseSizes)->Name("Tan_float");
|
||||
BENCHMARK(BM_Asin<float>)->Apply(CwiseSizes)->Name("Asin_float");
|
||||
BENCHMARK(BM_Atan<float>)->Apply(CwiseSizes)->Name("Atan_float");
|
||||
BENCHMARK(BM_Tanh<float>)->Apply(CwiseSizes)->Name("Tanh_float");
|
||||
BENCHMARK(BM_Erf<float>)->Apply(CwiseSizes)->Name("Erf_float");
|
||||
BENCHMARK(BM_Abs<float>)->Apply(CwiseSizes)->Name("Abs_float");
|
||||
BENCHMARK(BM_Square<float>)->Apply(CwiseSizes)->Name("Square_float");
|
||||
BENCHMARK(BM_Cube<float>)->Apply(CwiseSizes)->Name("Cube_float");
|
||||
BENCHMARK(BM_Ceil<float>)->Apply(CwiseSizes)->Name("Ceil_float");
|
||||
BENCHMARK(BM_Floor<float>)->Apply(CwiseSizes)->Name("Floor_float");
|
||||
BENCHMARK(BM_Round<float>)->Apply(CwiseSizes)->Name("Round_float");
|
||||
BENCHMARK(BM_Sigmoid<float>)->Apply(CwiseSizes)->Name("Sigmoid_float");
|
||||
BENCHMARK(BM_Pow<float>)->Apply(CwiseSizes)->Name("Pow_float");
|
||||
|
||||
// --- Register double ---
|
||||
BENCHMARK(BM_Exp<double>)->Apply(CwiseSizes)->Name("Exp_double");
|
||||
BENCHMARK(BM_Log<double>)->Apply(CwiseSizes)->Name("Log_double");
|
||||
BENCHMARK(BM_Log1p<double>)->Apply(CwiseSizes)->Name("Log1p_double");
|
||||
BENCHMARK(BM_Sqrt<double>)->Apply(CwiseSizes)->Name("Sqrt_double");
|
||||
BENCHMARK(BM_Rsqrt<double>)->Apply(CwiseSizes)->Name("Rsqrt_double");
|
||||
BENCHMARK(BM_Sin<double>)->Apply(CwiseSizes)->Name("Sin_double");
|
||||
BENCHMARK(BM_Cos<double>)->Apply(CwiseSizes)->Name("Cos_double");
|
||||
BENCHMARK(BM_Tan<double>)->Apply(CwiseSizes)->Name("Tan_double");
|
||||
BENCHMARK(BM_Asin<double>)->Apply(CwiseSizes)->Name("Asin_double");
|
||||
BENCHMARK(BM_Atan<double>)->Apply(CwiseSizes)->Name("Atan_double");
|
||||
BENCHMARK(BM_Tanh<double>)->Apply(CwiseSizes)->Name("Tanh_double");
|
||||
BENCHMARK(BM_Erf<double>)->Apply(CwiseSizes)->Name("Erf_double");
|
||||
BENCHMARK(BM_Abs<double>)->Apply(CwiseSizes)->Name("Abs_double");
|
||||
BENCHMARK(BM_Square<double>)->Apply(CwiseSizes)->Name("Square_double");
|
||||
BENCHMARK(BM_Cube<double>)->Apply(CwiseSizes)->Name("Cube_double");
|
||||
BENCHMARK(BM_Ceil<double>)->Apply(CwiseSizes)->Name("Ceil_double");
|
||||
BENCHMARK(BM_Floor<double>)->Apply(CwiseSizes)->Name("Floor_double");
|
||||
BENCHMARK(BM_Round<double>)->Apply(CwiseSizes)->Name("Round_double");
|
||||
BENCHMARK(BM_Sigmoid<double>)->Apply(CwiseSizes)->Name("Sigmoid_double");
|
||||
BENCHMARK(BM_Pow<double>)->Apply(CwiseSizes)->Name("Pow_double");
|
||||
81
benchmarks/Core/bench_diagonal.cpp
Normal file
81
benchmarks/Core/bench_diagonal.cpp
Normal file
@@ -0,0 +1,81 @@
|
||||
// Benchmarks for diagonal operations.
|
||||
//
|
||||
// Tests diagonal extraction, diagonal-matrix product, and matrix-diagonal product.
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
// Extract diagonal from a square matrix and sum it.
|
||||
template <typename Scalar>
|
||||
static void BM_DiagonalExtract(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(n, n);
|
||||
for (auto _ : state) {
|
||||
Scalar s = A.diagonal().sum();
|
||||
benchmark::DoNotOptimize(s);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// y = diag(d) * x (diagonal matrix times vector).
|
||||
template <typename Scalar>
|
||||
static void BM_DiagonalTimesVector(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Vec = Matrix<Scalar, Dynamic, 1>;
|
||||
Vec d = Vec::Random(n);
|
||||
Vec x = Vec::Random(n);
|
||||
Vec y(n);
|
||||
for (auto _ : state) {
|
||||
y = d.asDiagonal() * x;
|
||||
benchmark::DoNotOptimize(y.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * 3 * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// C = diag(d) * A (diagonal matrix times dense matrix).
|
||||
template <typename Scalar>
|
||||
static void BM_DiagonalTimesMatrix(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
using Vec = Matrix<Scalar, Dynamic, 1>;
|
||||
Vec d = Vec::Random(n);
|
||||
Mat A = Mat::Random(n, n);
|
||||
Mat C(n, n);
|
||||
for (auto _ : state) {
|
||||
C.noalias() = d.asDiagonal() * A;
|
||||
benchmark::DoNotOptimize(C.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * 2 * n * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// C = A * diag(d) (dense matrix times diagonal matrix).
|
||||
template <typename Scalar>
|
||||
static void BM_MatrixTimesDiagonal(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
using Vec = Matrix<Scalar, Dynamic, 1>;
|
||||
Vec d = Vec::Random(n);
|
||||
Mat A = Mat::Random(n, n);
|
||||
Mat C(n, n);
|
||||
for (auto _ : state) {
|
||||
C.noalias() = A * d.asDiagonal();
|
||||
benchmark::DoNotOptimize(C.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * 2 * n * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
static void Sizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {32, 64, 128, 256, 512, 1024}) b->Arg(n);
|
||||
}
|
||||
|
||||
BENCHMARK(BM_DiagonalExtract<float>)->Apply(Sizes)->Name("DiagonalExtract_float");
|
||||
BENCHMARK(BM_DiagonalExtract<double>)->Apply(Sizes)->Name("DiagonalExtract_double");
|
||||
BENCHMARK(BM_DiagonalTimesVector<float>)->Apply(Sizes)->Name("DiagonalTimesVector_float");
|
||||
BENCHMARK(BM_DiagonalTimesVector<double>)->Apply(Sizes)->Name("DiagonalTimesVector_double");
|
||||
BENCHMARK(BM_DiagonalTimesMatrix<float>)->Apply(Sizes)->Name("DiagonalTimesMatrix_float");
|
||||
BENCHMARK(BM_DiagonalTimesMatrix<double>)->Apply(Sizes)->Name("DiagonalTimesMatrix_double");
|
||||
BENCHMARK(BM_MatrixTimesDiagonal<float>)->Apply(Sizes)->Name("MatrixTimesDiagonal_float");
|
||||
BENCHMARK(BM_MatrixTimesDiagonal<double>)->Apply(Sizes)->Name("MatrixTimesDiagonal_double");
|
||||
51
benchmarks/Core/bench_dot.cpp
Normal file
51
benchmarks/Core/bench_dot.cpp
Normal file
@@ -0,0 +1,51 @@
|
||||
// Benchmarks for dot product (BLAS-1 critical path).
|
||||
//
|
||||
// Flop count: 2n for real, 8n for complex.
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
template <typename Scalar>
|
||||
double dotFlops(Index n) {
|
||||
return (NumTraits<Scalar>::IsComplex ? 8.0 : 2.0) * n;
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_Dot(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Vec = Matrix<Scalar, Dynamic, 1>;
|
||||
Vec a = Vec::Random(n);
|
||||
Vec b = Vec::Random(n);
|
||||
for (auto _ : state) {
|
||||
Scalar d = a.dot(b);
|
||||
benchmark::DoNotOptimize(d);
|
||||
}
|
||||
state.counters["GFLOPS"] = benchmark::Counter(dotFlops<Scalar>(n), benchmark::Counter::kIsIterationInvariantRate,
|
||||
benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_SquaredNorm(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Vec = Matrix<Scalar, Dynamic, 1>;
|
||||
Vec a = Vec::Random(n);
|
||||
for (auto _ : state) {
|
||||
auto d = a.squaredNorm();
|
||||
benchmark::DoNotOptimize(d);
|
||||
}
|
||||
state.counters["GFLOPS"] = benchmark::Counter(dotFlops<Scalar>(n), benchmark::Counter::kIsIterationInvariantRate,
|
||||
benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
static void DotSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {64, 256, 1024, 4096, 16384, 65536, 262144, 1048576}) b->Arg(n);
|
||||
}
|
||||
|
||||
BENCHMARK(BM_Dot<float>)->Apply(DotSizes)->Name("Dot_float");
|
||||
BENCHMARK(BM_Dot<double>)->Apply(DotSizes)->Name("Dot_double");
|
||||
BENCHMARK(BM_Dot<std::complex<float>>)->Apply(DotSizes)->Name("Dot_cfloat");
|
||||
BENCHMARK(BM_Dot<std::complex<double>>)->Apply(DotSizes)->Name("Dot_cdouble");
|
||||
BENCHMARK(BM_SquaredNorm<float>)->Apply(DotSizes)->Name("SquaredNorm_float");
|
||||
BENCHMARK(BM_SquaredNorm<double>)->Apply(DotSizes)->Name("SquaredNorm_double");
|
||||
103
benchmarks/Core/bench_map.cpp
Normal file
103
benchmarks/Core/bench_map.cpp
Normal file
@@ -0,0 +1,103 @@
|
||||
// Benchmarks for Map and Ref with various strides.
|
||||
//
|
||||
// Compares contiguous Map vs strided Map vs owned matrix for basic
|
||||
// operations (GEMV and vector sum).
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
// Sum a contiguous Map<VectorX>.
|
||||
template <typename Scalar>
|
||||
static void BM_MapContiguousSum(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
std::vector<Scalar> buf(n);
|
||||
Map<Matrix<Scalar, Dynamic, 1>> v(buf.data(), n);
|
||||
v.setRandom();
|
||||
for (auto _ : state) {
|
||||
Scalar s = v.sum();
|
||||
benchmark::DoNotOptimize(s);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// Sum a strided Map (InnerStride).
|
||||
template <typename Scalar>
|
||||
static void BM_MapStridedSum(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
const Index stride = 3;
|
||||
std::vector<Scalar> buf(n * stride);
|
||||
Map<Matrix<Scalar, Dynamic, 1>, 0, InnerStride<>> v(buf.data(), n, InnerStride<>(stride));
|
||||
v.setRandom();
|
||||
for (auto _ : state) {
|
||||
Scalar s = v.sum();
|
||||
benchmark::DoNotOptimize(s);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// Sum an owned VectorX (baseline).
|
||||
template <typename Scalar>
|
||||
static void BM_OwnedSum(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
|
||||
for (auto _ : state) {
|
||||
Scalar s = v.sum();
|
||||
benchmark::DoNotOptimize(s);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// GEMV through contiguous Map<MatrixX>.
|
||||
template <typename Scalar>
|
||||
static void BM_MapGemv(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
std::vector<Scalar> buf(n * n);
|
||||
Map<Matrix<Scalar, Dynamic, Dynamic>> A(buf.data(), n, n);
|
||||
A.setRandom();
|
||||
Matrix<Scalar, Dynamic, 1> x = Matrix<Scalar, Dynamic, 1>::Random(n);
|
||||
Matrix<Scalar, Dynamic, 1> y = Matrix<Scalar, Dynamic, 1>::Random(n);
|
||||
for (auto _ : state) {
|
||||
y.noalias() += A * x;
|
||||
benchmark::DoNotOptimize(y.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(2.0 * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
// GEMV with owned matrix (baseline).
|
||||
template <typename Scalar>
|
||||
static void BM_OwnedGemv(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, Dynamic> A = Matrix<Scalar, Dynamic, Dynamic>::Random(n, n);
|
||||
Matrix<Scalar, Dynamic, 1> x = Matrix<Scalar, Dynamic, 1>::Random(n);
|
||||
Matrix<Scalar, Dynamic, 1> y = Matrix<Scalar, Dynamic, 1>::Random(n);
|
||||
for (auto _ : state) {
|
||||
y.noalias() += A * x;
|
||||
benchmark::DoNotOptimize(y.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(2.0 * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
static void SumSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {256, 1024, 4096, 16384, 65536, 262144, 1048576}) b->Arg(n);
|
||||
}
|
||||
|
||||
static void GemvSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {32, 128, 512, 1024}) b->Arg(n);
|
||||
}
|
||||
|
||||
BENCHMARK(BM_MapContiguousSum<float>)->Apply(SumSizes)->Name("MapContiguousSum_float");
|
||||
BENCHMARK(BM_MapStridedSum<float>)->Apply(SumSizes)->Name("MapStridedSum_float");
|
||||
BENCHMARK(BM_OwnedSum<float>)->Apply(SumSizes)->Name("OwnedSum_float");
|
||||
BENCHMARK(BM_MapContiguousSum<double>)->Apply(SumSizes)->Name("MapContiguousSum_double");
|
||||
BENCHMARK(BM_MapStridedSum<double>)->Apply(SumSizes)->Name("MapStridedSum_double");
|
||||
BENCHMARK(BM_OwnedSum<double>)->Apply(SumSizes)->Name("OwnedSum_double");
|
||||
BENCHMARK(BM_MapGemv<float>)->Apply(GemvSizes)->Name("MapGemv_float");
|
||||
BENCHMARK(BM_OwnedGemv<float>)->Apply(GemvSizes)->Name("OwnedGemv_float");
|
||||
BENCHMARK(BM_MapGemv<double>)->Apply(GemvSizes)->Name("MapGemv_double");
|
||||
BENCHMARK(BM_OwnedGemv<double>)->Apply(GemvSizes)->Name("OwnedGemv_double");
|
||||
173
benchmarks/Core/bench_reductions.cpp
Normal file
173
benchmarks/Core/bench_reductions.cpp
Normal file
@@ -0,0 +1,173 @@
|
||||
// Benchmarks for full reductions: sum, prod, minCoeff, maxCoeff, mean,
|
||||
// norm, squaredNorm, lpNorm<1>, lpNorm<Infinity>.
|
||||
//
|
||||
// These are memory-bandwidth-bound for large vectors, so we report
|
||||
// bytes processed rather than FLOPS.
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
// --- Vector reductions (1-D) ---
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_VectorSum(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
|
||||
for (auto _ : state) {
|
||||
Scalar s = v.sum();
|
||||
benchmark::DoNotOptimize(s);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_VectorProd(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Constant(n, Scalar(1));
|
||||
// Use values near 1 to avoid overflow/underflow.
|
||||
v += Scalar(0.001) * Matrix<Scalar, Dynamic, 1>::Random(n);
|
||||
for (auto _ : state) {
|
||||
Scalar p = v.prod();
|
||||
benchmark::DoNotOptimize(p);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_VectorMinCoeff(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
|
||||
for (auto _ : state) {
|
||||
Scalar m = v.minCoeff();
|
||||
benchmark::DoNotOptimize(m);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_VectorMaxCoeff(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
|
||||
for (auto _ : state) {
|
||||
Scalar m = v.maxCoeff();
|
||||
benchmark::DoNotOptimize(m);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_VectorMean(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
|
||||
for (auto _ : state) {
|
||||
Scalar m = v.mean();
|
||||
benchmark::DoNotOptimize(m);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_VectorSquaredNorm(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
|
||||
for (auto _ : state) {
|
||||
Scalar s = v.squaredNorm();
|
||||
benchmark::DoNotOptimize(s);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_VectorNorm(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
|
||||
for (auto _ : state) {
|
||||
Scalar s = v.norm();
|
||||
benchmark::DoNotOptimize(s);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_VectorLpNorm1(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
|
||||
for (auto _ : state) {
|
||||
Scalar s = v.template lpNorm<1>();
|
||||
benchmark::DoNotOptimize(s);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_VectorLpNormInf(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, 1> v = Matrix<Scalar, Dynamic, 1>::Random(n);
|
||||
for (auto _ : state) {
|
||||
Scalar s = v.template lpNorm<Infinity>();
|
||||
benchmark::DoNotOptimize(s);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// --- Matrix reductions (2-D) ---
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_MatrixSum(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, Dynamic> m = Matrix<Scalar, Dynamic, Dynamic>::Random(n, n);
|
||||
for (auto _ : state) {
|
||||
Scalar s = m.sum();
|
||||
benchmark::DoNotOptimize(s);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_MatrixNorm(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
Matrix<Scalar, Dynamic, Dynamic> m = Matrix<Scalar, Dynamic, Dynamic>::Random(n, n);
|
||||
for (auto _ : state) {
|
||||
Scalar s = m.norm();
|
||||
benchmark::DoNotOptimize(s);
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar));
|
||||
}
|
||||
|
||||
// --- Size configurations ---
|
||||
|
||||
static void VectorSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {64, 256, 1024, 4096, 16384, 65536, 262144, 1048576}) b->Arg(n);
|
||||
}
|
||||
|
||||
static void MatrixSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {8, 32, 64, 128, 256, 512, 1024}) b->Arg(n);
|
||||
}
|
||||
|
||||
// --- Register: float ---
|
||||
BENCHMARK(BM_VectorSum<float>)->Apply(VectorSizes)->Name("VectorSum_float");
|
||||
BENCHMARK(BM_VectorProd<float>)->Apply(VectorSizes)->Name("VectorProd_float");
|
||||
BENCHMARK(BM_VectorMinCoeff<float>)->Apply(VectorSizes)->Name("VectorMinCoeff_float");
|
||||
BENCHMARK(BM_VectorMaxCoeff<float>)->Apply(VectorSizes)->Name("VectorMaxCoeff_float");
|
||||
BENCHMARK(BM_VectorMean<float>)->Apply(VectorSizes)->Name("VectorMean_float");
|
||||
BENCHMARK(BM_VectorSquaredNorm<float>)->Apply(VectorSizes)->Name("VectorSquaredNorm_float");
|
||||
BENCHMARK(BM_VectorNorm<float>)->Apply(VectorSizes)->Name("VectorNorm_float");
|
||||
BENCHMARK(BM_VectorLpNorm1<float>)->Apply(VectorSizes)->Name("VectorLpNorm1_float");
|
||||
BENCHMARK(BM_VectorLpNormInf<float>)->Apply(VectorSizes)->Name("VectorLpNormInf_float");
|
||||
BENCHMARK(BM_MatrixSum<float>)->Apply(MatrixSizes)->Name("MatrixSum_float");
|
||||
BENCHMARK(BM_MatrixNorm<float>)->Apply(MatrixSizes)->Name("MatrixNorm_float");
|
||||
|
||||
// --- Register: double ---
|
||||
BENCHMARK(BM_VectorSum<double>)->Apply(VectorSizes)->Name("VectorSum_double");
|
||||
BENCHMARK(BM_VectorProd<double>)->Apply(VectorSizes)->Name("VectorProd_double");
|
||||
BENCHMARK(BM_VectorMinCoeff<double>)->Apply(VectorSizes)->Name("VectorMinCoeff_double");
|
||||
BENCHMARK(BM_VectorMaxCoeff<double>)->Apply(VectorSizes)->Name("VectorMaxCoeff_double");
|
||||
BENCHMARK(BM_VectorMean<double>)->Apply(VectorSizes)->Name("VectorMean_double");
|
||||
BENCHMARK(BM_VectorSquaredNorm<double>)->Apply(VectorSizes)->Name("VectorSquaredNorm_double");
|
||||
BENCHMARK(BM_VectorNorm<double>)->Apply(VectorSizes)->Name("VectorNorm_double");
|
||||
BENCHMARK(BM_VectorLpNorm1<double>)->Apply(VectorSizes)->Name("VectorLpNorm1_double");
|
||||
BENCHMARK(BM_VectorLpNormInf<double>)->Apply(VectorSizes)->Name("VectorLpNormInf_double");
|
||||
BENCHMARK(BM_MatrixSum<double>)->Apply(MatrixSizes)->Name("MatrixSum_double");
|
||||
BENCHMARK(BM_MatrixNorm<double>)->Apply(MatrixSizes)->Name("MatrixNorm_double");
|
||||
78
benchmarks/Core/bench_selfadjoint_product.cpp
Normal file
78
benchmarks/Core/bench_selfadjoint_product.cpp
Normal file
@@ -0,0 +1,78 @@
|
||||
// Benchmarks for self-adjoint (symmetric/hermitian) matrix operations.
|
||||
//
|
||||
// Tests SYMM (selfadjointView * dense) and rank-k updates.
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
// C = selfadjointView<Lower>(A) * B (SYMM)
|
||||
template <typename Scalar>
|
||||
static void BM_SYMM_Left(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(n, n);
|
||||
A = (A + A.transpose()).eval() / Scalar(2);
|
||||
Mat B = Mat::Random(n, n);
|
||||
Mat C(n, n);
|
||||
for (auto _ : state) {
|
||||
C.noalias() = A.template selfadjointView<Lower>() * B;
|
||||
benchmark::DoNotOptimize(C.data());
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(2.0 * n * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
// C = B * selfadjointView<Lower>(A)
|
||||
template <typename Scalar>
|
||||
static void BM_SYMM_Right(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(n, n);
|
||||
A = (A + A.transpose()).eval() / Scalar(2);
|
||||
Mat B = Mat::Random(n, n);
|
||||
Mat C(n, n);
|
||||
for (auto _ : state) {
|
||||
C.noalias() = B * A.template selfadjointView<Lower>();
|
||||
benchmark::DoNotOptimize(C.data());
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(2.0 * n * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
// Rank-k update: C.selfadjointView<Lower>().rankUpdate(A)
|
||||
// Computes C += A * A^T
|
||||
template <typename Scalar>
|
||||
static void BM_RankUpdate(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
const Index k = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(n, k);
|
||||
Mat C = Mat::Zero(n, n);
|
||||
for (auto _ : state) {
|
||||
C.template selfadjointView<Lower>().rankUpdate(A);
|
||||
benchmark::DoNotOptimize(C.data());
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(1.0 * n * n * k, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
static void SymmSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {64, 128, 256, 512, 1024}) b->Arg(n);
|
||||
}
|
||||
|
||||
static void RankUpdateSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {64, 128, 256, 512}) {
|
||||
for (int k : {16, 64, 256}) {
|
||||
b->Args({n, k});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(BM_SYMM_Left<float>)->Apply(SymmSizes)->Name("SYMM_Left_float");
|
||||
BENCHMARK(BM_SYMM_Left<double>)->Apply(SymmSizes)->Name("SYMM_Left_double");
|
||||
BENCHMARK(BM_SYMM_Right<float>)->Apply(SymmSizes)->Name("SYMM_Right_float");
|
||||
BENCHMARK(BM_SYMM_Right<double>)->Apply(SymmSizes)->Name("SYMM_Right_double");
|
||||
BENCHMARK(BM_RankUpdate<float>)->Apply(RankUpdateSizes)->Name("RankUpdate_float");
|
||||
BENCHMARK(BM_RankUpdate<double>)->Apply(RankUpdateSizes)->Name("RankUpdate_double");
|
||||
56
benchmarks/Core/bench_triangular_product.cpp
Normal file
56
benchmarks/Core/bench_triangular_product.cpp
Normal file
@@ -0,0 +1,56 @@
|
||||
// Benchmarks for triangular-dense matrix products (TRMM).
|
||||
//
|
||||
// Tests C = triangular(A) * B for various modes (Lower/Upper) and sides (Left/Right).
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
// C = triangularView<Mode>(A) * B
|
||||
template <typename Scalar, unsigned int Mode>
|
||||
static void BM_TRMM_Left(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(n, n);
|
||||
Mat B = Mat::Random(n, n);
|
||||
Mat C(n, n);
|
||||
for (auto _ : state) {
|
||||
C.noalias() = A.template triangularView<Mode>() * B;
|
||||
benchmark::DoNotOptimize(C.data());
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(1.0 * n * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
// C = B * triangularView<Mode>(A)
|
||||
template <typename Scalar, unsigned int Mode>
|
||||
static void BM_TRMM_Right(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(n, n);
|
||||
Mat B = Mat::Random(n, n);
|
||||
Mat C(n, n);
|
||||
for (auto _ : state) {
|
||||
C.noalias() = B * A.template triangularView<Mode>();
|
||||
benchmark::DoNotOptimize(C.data());
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(1.0 * n * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
static void TrmmSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {64, 128, 256, 512, 1024}) b->Arg(n);
|
||||
}
|
||||
|
||||
// Left product
|
||||
BENCHMARK(BM_TRMM_Left<float, Lower>)->Apply(TrmmSizes)->Name("TRMM_Left_float_Lower");
|
||||
BENCHMARK(BM_TRMM_Left<float, Upper>)->Apply(TrmmSizes)->Name("TRMM_Left_float_Upper");
|
||||
BENCHMARK(BM_TRMM_Left<double, Lower>)->Apply(TrmmSizes)->Name("TRMM_Left_double_Lower");
|
||||
BENCHMARK(BM_TRMM_Left<double, Upper>)->Apply(TrmmSizes)->Name("TRMM_Left_double_Upper");
|
||||
|
||||
// Right product
|
||||
BENCHMARK(BM_TRMM_Right<float, Lower>)->Apply(TrmmSizes)->Name("TRMM_Right_float_Lower");
|
||||
BENCHMARK(BM_TRMM_Right<float, Upper>)->Apply(TrmmSizes)->Name("TRMM_Right_float_Upper");
|
||||
BENCHMARK(BM_TRMM_Right<double, Lower>)->Apply(TrmmSizes)->Name("TRMM_Right_double_Lower");
|
||||
BENCHMARK(BM_TRMM_Right<double, Upper>)->Apply(TrmmSizes)->Name("TRMM_Right_double_Upper");
|
||||
@@ -1 +1 @@
|
||||
# LU benchmarks will be added here.
|
||||
eigen_add_benchmark(bench_lu bench_lu.cpp)
|
||||
|
||||
139
benchmarks/LU/bench_lu.cpp
Normal file
139
benchmarks/LU/bench_lu.cpp
Normal file
@@ -0,0 +1,139 @@
|
||||
// Benchmarks for LU decompositions.
|
||||
//
|
||||
// Tests PartialPivLU and FullPivLU: compute, solve, inverse, determinant.
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/LU>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
typedef Matrix<float, Dynamic, Dynamic> Matf;
|
||||
typedef Matrix<double, Dynamic, Dynamic> Matd;
|
||||
|
||||
// --- PartialPivLU ---
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DONT_INLINE void do_compute(PartialPivLU<Matrix<Scalar, Dynamic, Dynamic>>& lu,
|
||||
const Matrix<Scalar, Dynamic, Dynamic>& A) {
|
||||
lu.compute(A);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_PartialPivLU_Compute(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(n, n);
|
||||
PartialPivLU<Mat> lu(n);
|
||||
for (auto _ : state) {
|
||||
do_compute(lu, A);
|
||||
benchmark::DoNotOptimize(lu.matrixLU().data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_PartialPivLU_Solve(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
const Index nrhs = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(n, n);
|
||||
Mat B = Mat::Random(n, nrhs);
|
||||
PartialPivLU<Mat> lu(A);
|
||||
Mat X(n, nrhs);
|
||||
for (auto _ : state) {
|
||||
X = lu.solve(B);
|
||||
benchmark::DoNotOptimize(X.data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_PartialPivLU_Inverse(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(n, n);
|
||||
PartialPivLU<Mat> lu(A);
|
||||
Mat inv(n, n);
|
||||
for (auto _ : state) {
|
||||
inv = lu.inverse();
|
||||
benchmark::DoNotOptimize(inv.data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_PartialPivLU_Determinant(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(n, n);
|
||||
PartialPivLU<Mat> lu(A);
|
||||
for (auto _ : state) {
|
||||
Scalar d = lu.determinant();
|
||||
benchmark::DoNotOptimize(d);
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
// --- FullPivLU ---
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DONT_INLINE void do_compute(FullPivLU<Matrix<Scalar, Dynamic, Dynamic>>& lu,
|
||||
const Matrix<Scalar, Dynamic, Dynamic>& A) {
|
||||
lu.compute(A);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_FullPivLU_Compute(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(n, n);
|
||||
FullPivLU<Mat> lu(n, n);
|
||||
for (auto _ : state) {
|
||||
do_compute(lu, A);
|
||||
benchmark::DoNotOptimize(lu.matrixLU().data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_FullPivLU_Solve(benchmark::State& state) {
|
||||
const Index n = state.range(0);
|
||||
const Index nrhs = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(n, n);
|
||||
Mat B = Mat::Random(n, nrhs);
|
||||
FullPivLU<Mat> lu(A);
|
||||
Mat X(n, nrhs);
|
||||
for (auto _ : state) {
|
||||
X = lu.solve(B);
|
||||
benchmark::DoNotOptimize(X.data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
// --- Size configurations ---
|
||||
|
||||
static void SquareSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {8, 32, 64, 128, 256, 512, 1024}) b->Arg(n);
|
||||
}
|
||||
|
||||
static void SolveSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {32, 128, 512, 1024}) {
|
||||
for (int nrhs : {1, 16, 64}) {
|
||||
b->Args({n, nrhs});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(BM_PartialPivLU_Compute<float>)->Apply(SquareSizes)->Name("PartialPivLU_Compute_float");
|
||||
BENCHMARK(BM_PartialPivLU_Compute<double>)->Apply(SquareSizes)->Name("PartialPivLU_Compute_double");
|
||||
BENCHMARK(BM_PartialPivLU_Solve<float>)->Apply(SolveSizes)->Name("PartialPivLU_Solve_float");
|
||||
BENCHMARK(BM_PartialPivLU_Solve<double>)->Apply(SolveSizes)->Name("PartialPivLU_Solve_double");
|
||||
BENCHMARK(BM_PartialPivLU_Inverse<float>)->Apply(SquareSizes)->Name("PartialPivLU_Inverse_float");
|
||||
BENCHMARK(BM_PartialPivLU_Inverse<double>)->Apply(SquareSizes)->Name("PartialPivLU_Inverse_double");
|
||||
BENCHMARK(BM_PartialPivLU_Determinant<float>)->Apply(SquareSizes)->Name("PartialPivLU_Determinant_float");
|
||||
BENCHMARK(BM_PartialPivLU_Determinant<double>)->Apply(SquareSizes)->Name("PartialPivLU_Determinant_double");
|
||||
BENCHMARK(BM_FullPivLU_Compute<float>)->Apply(SquareSizes)->Name("FullPivLU_Compute_float");
|
||||
BENCHMARK(BM_FullPivLU_Compute<double>)->Apply(SquareSizes)->Name("FullPivLU_Compute_double");
|
||||
BENCHMARK(BM_FullPivLU_Solve<float>)->Apply(SolveSizes)->Name("FullPivLU_Solve_float");
|
||||
BENCHMARK(BM_FullPivLU_Solve<double>)->Apply(SolveSizes)->Name("FullPivLU_Solve_double");
|
||||
@@ -1 +1 @@
|
||||
# QR benchmarks will be added here.
|
||||
eigen_add_benchmark(bench_qr bench_qr.cpp)
|
||||
|
||||
123
benchmarks/QR/bench_qr.cpp
Normal file
123
benchmarks/QR/bench_qr.cpp
Normal file
@@ -0,0 +1,123 @@
|
||||
// Benchmarks for QR decompositions.
|
||||
//
|
||||
// Tests HouseholderQR, ColPivHouseholderQR, FullPivHouseholderQR, and COD.
|
||||
// Both square and tall-thin matrix shapes are tested.
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/QR>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
template <typename QR>
|
||||
EIGEN_DONT_INLINE void do_compute(QR& qr, const typename QR::MatrixType& A) {
|
||||
qr.compute(A);
|
||||
}
|
||||
|
||||
// --- HouseholderQR ---
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_HouseholderQR(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(rows, cols);
|
||||
HouseholderQR<Mat> qr(rows, cols);
|
||||
for (auto _ : state) {
|
||||
do_compute(qr, A);
|
||||
benchmark::DoNotOptimize(qr.matrixQR().data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
// --- ColPivHouseholderQR ---
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_ColPivHouseholderQR(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(rows, cols);
|
||||
ColPivHouseholderQR<Mat> qr(rows, cols);
|
||||
for (auto _ : state) {
|
||||
do_compute(qr, A);
|
||||
benchmark::DoNotOptimize(qr.matrixQR().data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
// --- FullPivHouseholderQR ---
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_FullPivHouseholderQR(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(rows, cols);
|
||||
FullPivHouseholderQR<Mat> qr(rows, cols);
|
||||
for (auto _ : state) {
|
||||
do_compute(qr, A);
|
||||
benchmark::DoNotOptimize(qr.matrixQR().data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
// --- CompleteOrthogonalDecomposition (COD) ---
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_COD(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
Mat A = Mat::Random(rows, cols);
|
||||
CompleteOrthogonalDecomposition<Mat> cod(rows, cols);
|
||||
for (auto _ : state) {
|
||||
do_compute(cod, A);
|
||||
benchmark::DoNotOptimize(cod.matrixQTZ().data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
// --- QR solve ---
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_HouseholderQR_Solve(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
using Vec = Matrix<Scalar, Dynamic, 1>;
|
||||
Mat A = Mat::Random(rows, cols);
|
||||
Vec b = Vec::Random(rows);
|
||||
HouseholderQR<Mat> qr(A);
|
||||
Vec x(cols);
|
||||
for (auto _ : state) {
|
||||
x = qr.solve(b);
|
||||
benchmark::DoNotOptimize(x.data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
// --- Size configurations ---
|
||||
|
||||
static void QrSizes(::benchmark::Benchmark* b) {
|
||||
// Square
|
||||
for (int n : {32, 64, 128, 256, 512, 1024}) b->Args({n, n});
|
||||
// Tall-thin
|
||||
b->Args({1000, 32});
|
||||
b->Args({1000, 100});
|
||||
b->Args({10000, 32});
|
||||
b->Args({10000, 100});
|
||||
}
|
||||
|
||||
// Register: float
|
||||
BENCHMARK(BM_HouseholderQR<float>)->Apply(QrSizes)->Name("HouseholderQR_float");
|
||||
BENCHMARK(BM_ColPivHouseholderQR<float>)->Apply(QrSizes)->Name("ColPivHouseholderQR_float");
|
||||
BENCHMARK(BM_FullPivHouseholderQR<float>)->Apply(QrSizes)->Name("FullPivHouseholderQR_float");
|
||||
BENCHMARK(BM_COD<float>)->Apply(QrSizes)->Name("COD_float");
|
||||
BENCHMARK(BM_HouseholderQR_Solve<float>)->Apply(QrSizes)->Name("HouseholderQR_Solve_float");
|
||||
|
||||
// Register: double
|
||||
BENCHMARK(BM_HouseholderQR<double>)->Apply(QrSizes)->Name("HouseholderQR_double");
|
||||
BENCHMARK(BM_ColPivHouseholderQR<double>)->Apply(QrSizes)->Name("ColPivHouseholderQR_double");
|
||||
BENCHMARK(BM_FullPivHouseholderQR<double>)->Apply(QrSizes)->Name("FullPivHouseholderQR_double");
|
||||
BENCHMARK(BM_COD<double>)->Apply(QrSizes)->Name("COD_double");
|
||||
BENCHMARK(BM_HouseholderQR_Solve<double>)->Apply(QrSizes)->Name("HouseholderQR_Solve_double");
|
||||
Reference in New Issue
Block a user