mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Migrate Eigen benchmarks to the Google benchmark framework
libeigen/eigen!2132 Closes #3025 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
60
benchmarks/CMakeLists.txt
Normal file
60
benchmarks/CMakeLists.txt
Normal file
@@ -0,0 +1,60 @@
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
project(EigenBenchmarks CXX)
|
||||
|
||||
find_package(benchmark REQUIRED)
|
||||
|
||||
# Eigen is a header-only library; find it relative to this directory.
|
||||
set(EIGEN_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/..")
|
||||
|
||||
# Helper: add a Google Benchmark target.
|
||||
# eigen_add_benchmark(name source [LIBRARIES lib1 lib2 ...] [DEFINITIONS def1 def2 ...])
|
||||
function(eigen_add_benchmark name source)
|
||||
cmake_parse_arguments(BENCH "" "" "LIBRARIES;DEFINITIONS" ${ARGN})
|
||||
add_executable(${name} ${source})
|
||||
target_include_directories(${name} PRIVATE ${EIGEN_SOURCE_DIR})
|
||||
target_link_libraries(${name} PRIVATE benchmark::benchmark benchmark::benchmark_main)
|
||||
if(BENCH_LIBRARIES)
|
||||
target_link_libraries(${name} PRIVATE ${BENCH_LIBRARIES})
|
||||
endif()
|
||||
target_compile_options(${name} PRIVATE -O3 -DNDEBUG)
|
||||
if(BENCH_DEFINITIONS)
|
||||
target_compile_definitions(${name} PRIVATE ${BENCH_DEFINITIONS})
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# --- Dense benchmarks ---
|
||||
eigen_add_benchmark(bench_cholesky benchCholesky.cpp)
|
||||
eigen_add_benchmark(bench_eigensolver benchEigenSolver.cpp)
|
||||
eigen_add_benchmark(bench_fft benchFFT.cpp)
|
||||
eigen_add_benchmark(bench_geometry_transforms benchGeometry.cpp)
|
||||
eigen_add_benchmark(bench_vecadd benchVecAdd.cpp)
|
||||
eigen_add_benchmark(bench_gemm benchGemm.cpp)
|
||||
eigen_add_benchmark(bench_gemm_double benchGemm.cpp DEFINITIONS SCALAR=double)
|
||||
eigen_add_benchmark(bench_gemv benchGemv.cpp)
|
||||
eigen_add_benchmark(bench_move_semantics bench_move_semantics.cpp)
|
||||
eigen_add_benchmark(bench_reverse bench_reverse.cpp)
|
||||
eigen_add_benchmark(bench_dense_solvers dense_solvers.cpp)
|
||||
eigen_add_benchmark(bench_trsm bench_trsm.cpp)
|
||||
eigen_add_benchmark(bench_svd bench_svd.cpp)
|
||||
eigen_add_benchmark(bench_eig33 eig33.cpp)
|
||||
eigen_add_benchmark(bench_geometry geometry.cpp)
|
||||
eigen_add_benchmark(bench_quatmul quatmul.cpp)
|
||||
|
||||
# --- Sparse benchmarks ---
|
||||
eigen_add_benchmark(bench_sparse_dense_product sparse_dense_product.cpp)
|
||||
eigen_add_benchmark(bench_sparse_product sparse_product.cpp)
|
||||
eigen_add_benchmark(bench_sparse_transpose sparse_transpose.cpp)
|
||||
|
||||
# --- GEMM blocking parameter sweep ---
|
||||
eigen_add_benchmark(bench_blocking_sizes benchmark_blocking_sizes.cpp)
|
||||
|
||||
# --- AOCL benchmark (AOCL optional) ---
|
||||
eigen_add_benchmark(bench_aocl benchmark_aocl.cpp)
|
||||
|
||||
# Optional: BLAS GEMM comparison benchmark (requires CBLAS)
|
||||
find_package(BLAS QUIET)
|
||||
if(BLAS_FOUND)
|
||||
eigen_add_benchmark(bench_blas_gemm benchBlasGemm.cpp
|
||||
LIBRARIES ${BLAS_LIBRARIES}
|
||||
DEFINITIONS HAVE_BLAS)
|
||||
endif()
|
||||
73
benchmarks/benchBlasGemm.cpp
Normal file
73
benchmarks/benchBlasGemm.cpp
Normal file
@@ -0,0 +1,73 @@
|
||||
// Benchmark: Eigen GEMM vs CBLAS GEMM
|
||||
// Requires CBLAS: compile with -DHAVE_BLAS and link -lcblas
|
||||
//
|
||||
// Based on bench/benchBlasGemm.cpp
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef SCALAR
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
typedef SCALAR Scalar;
|
||||
typedef Matrix<Scalar, Dynamic, Dynamic> MyMatrix;
|
||||
|
||||
static void BM_EigenGemm(benchmark::State& state) {
|
||||
int M = state.range(0);
|
||||
int N = state.range(1);
|
||||
int K = state.range(2);
|
||||
MyMatrix a = MyMatrix::Random(M, K);
|
||||
MyMatrix b = MyMatrix::Random(K, N);
|
||||
MyMatrix c = MyMatrix::Random(M, N);
|
||||
for (auto _ : state) {
|
||||
c.noalias() += a * b;
|
||||
benchmark::DoNotOptimize(c.data());
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(2.0 * M * N * K, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
#ifdef HAVE_BLAS
|
||||
extern "C" {
|
||||
#include <cblas.h>
|
||||
}
|
||||
|
||||
#ifdef _FLOAT
|
||||
#define CBLAS_GEMM cblas_sgemm
|
||||
#else
|
||||
#define CBLAS_GEMM cblas_dgemm
|
||||
#endif
|
||||
|
||||
static void BM_CblasGemm(benchmark::State& state) {
|
||||
int M = state.range(0);
|
||||
int N = state.range(1);
|
||||
int K = state.range(2);
|
||||
MyMatrix a = MyMatrix::Random(M, K);
|
||||
MyMatrix b = MyMatrix::Random(K, N);
|
||||
MyMatrix c = MyMatrix::Random(M, N);
|
||||
Scalar alpha = 1, beta = 1;
|
||||
for (auto _ : state) {
|
||||
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, a.data(), M, b.data(), K, beta, c.data(), M);
|
||||
benchmark::DoNotOptimize(c.data());
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(2.0 * M * N * K, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void GemmSizes(::benchmark::Benchmark* b) {
|
||||
for (int s : {32, 64, 128, 256, 512, 1024, 2048}) {
|
||||
b->Args({s, s, s});
|
||||
}
|
||||
// Rectangular
|
||||
b->Args({1000, 100, 1000});
|
||||
b->Args({100, 1000, 100});
|
||||
}
|
||||
|
||||
BENCHMARK(BM_EigenGemm)->Apply(GemmSizes);
|
||||
#ifdef HAVE_BLAS
|
||||
BENCHMARK(BM_CblasGemm)->Apply(GemmSizes);
|
||||
#endif
|
||||
48
benchmarks/benchCholesky.cpp
Normal file
48
benchmarks/benchCholesky.cpp
Normal file
@@ -0,0 +1,48 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
#include <Eigen/Cholesky>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
typedef float Scalar;
|
||||
|
||||
static void BM_LDLT(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
typedef Matrix<Scalar, Dynamic, Dynamic> MatrixType;
|
||||
typedef Matrix<Scalar, Dynamic, Dynamic> SquareMatrixType;
|
||||
MatrixType a = MatrixType::Random(n, n);
|
||||
SquareMatrixType covMat = a * a.adjoint();
|
||||
int r = internal::random<int>(0, n - 1);
|
||||
int c = internal::random<int>(0, n - 1);
|
||||
Scalar acc = 0;
|
||||
for (auto _ : state) {
|
||||
LDLT<SquareMatrixType> cholnosqrt(covMat);
|
||||
acc += cholnosqrt.matrixL().coeff(r, c);
|
||||
benchmark::DoNotOptimize(acc);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_LDLT)->RangeMultiplier(2)->Range(4, 1500);
|
||||
|
||||
static void BM_LLT(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
typedef Matrix<Scalar, Dynamic, Dynamic> MatrixType;
|
||||
typedef Matrix<Scalar, Dynamic, Dynamic> SquareMatrixType;
|
||||
MatrixType a = MatrixType::Random(n, n);
|
||||
SquareMatrixType covMat = a * a.adjoint();
|
||||
int r = internal::random<int>(0, n - 1);
|
||||
int c = internal::random<int>(0, n - 1);
|
||||
Scalar acc = 0;
|
||||
for (auto _ : state) {
|
||||
LLT<SquareMatrixType> chol(covMat);
|
||||
acc += chol.matrixL().coeff(r, c);
|
||||
benchmark::DoNotOptimize(acc);
|
||||
}
|
||||
double cost = 0;
|
||||
for (int j = 0; j < n; ++j) {
|
||||
int rem = std::max(n - j - 1, 0);
|
||||
cost += 2 * (rem * j + rem + j);
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(cost, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
BENCHMARK(BM_LLT)->RangeMultiplier(2)->Range(4, 1500);
|
||||
42
benchmarks/benchEigenSolver.cpp
Normal file
42
benchmarks/benchEigenSolver.cpp
Normal file
@@ -0,0 +1,42 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
#include <Eigen/QR>
|
||||
#include <Eigen/Eigenvalues>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
typedef float Scalar;
|
||||
|
||||
static void BM_SelfAdjointEigenSolver(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
typedef Matrix<Scalar, Dynamic, Dynamic> MatrixType;
|
||||
MatrixType a = MatrixType::Random(n, n);
|
||||
MatrixType covMat = a * a.adjoint();
|
||||
int r = internal::random<int>(0, n - 1);
|
||||
int c = internal::random<int>(0, n - 1);
|
||||
Scalar acc = 0;
|
||||
SelfAdjointEigenSolver<MatrixType> ei(covMat);
|
||||
for (auto _ : state) {
|
||||
ei.compute(covMat);
|
||||
acc += ei.eigenvectors().coeff(r, c);
|
||||
benchmark::DoNotOptimize(acc);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_SelfAdjointEigenSolver)->RangeMultiplier(2)->Range(4, 512);
|
||||
|
||||
static void BM_EigenSolver(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
typedef Matrix<Scalar, Dynamic, Dynamic> MatrixType;
|
||||
MatrixType a = MatrixType::Random(n, n);
|
||||
MatrixType covMat = a * a.adjoint();
|
||||
int r = internal::random<int>(0, n - 1);
|
||||
int c = internal::random<int>(0, n - 1);
|
||||
Scalar acc = 0;
|
||||
EigenSolver<MatrixType> ei(covMat);
|
||||
for (auto _ : state) {
|
||||
ei.compute(covMat);
|
||||
acc += std::norm(ei.eigenvectors().coeff(r, c));
|
||||
benchmark::DoNotOptimize(acc);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_EigenSolver)->RangeMultiplier(2)->Range(4, 512);
|
||||
43
benchmarks/benchFFT.cpp
Normal file
43
benchmarks/benchFFT.cpp
Normal file
@@ -0,0 +1,43 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
#include <unsupported/Eigen/FFT>
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
template <typename T, bool Forward, bool Unscaled = false, bool HalfSpec = false>
|
||||
static void BM_FFT(benchmark::State& state) {
|
||||
typedef typename NumTraits<T>::Real ScalarType;
|
||||
typedef std::complex<ScalarType> Complex;
|
||||
int nfft = state.range(0);
|
||||
std::vector<T> inbuf(nfft);
|
||||
std::vector<Complex> outbuf(nfft);
|
||||
FFT<ScalarType> fft;
|
||||
if (Unscaled) fft.SetFlag(fft.Unscaled);
|
||||
if (HalfSpec) fft.SetFlag(fft.HalfSpectrum);
|
||||
std::fill(inbuf.begin(), inbuf.end(), T(0));
|
||||
fft.fwd(outbuf, inbuf);
|
||||
|
||||
for (auto _ : state) {
|
||||
if (Forward)
|
||||
fft.fwd(outbuf, inbuf);
|
||||
else
|
||||
fft.inv(inbuf, outbuf);
|
||||
benchmark::DoNotOptimize(outbuf.data());
|
||||
benchmark::DoNotOptimize(inbuf.data());
|
||||
}
|
||||
double mflops_per_iter = 5.0 * nfft * std::log2(static_cast<double>(nfft));
|
||||
if (!NumTraits<T>::IsComplex) mflops_per_iter /= 2;
|
||||
state.counters["MFLOPS"] =
|
||||
benchmark::Counter(mflops_per_iter, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
BENCHMARK(BM_FFT<std::complex<float>, true>)->Arg(1024)->Arg(4096);
|
||||
BENCHMARK(BM_FFT<std::complex<float>, false>)->Arg(1024)->Arg(4096);
|
||||
BENCHMARK(BM_FFT<float, true>)->Arg(1024)->Arg(4096);
|
||||
BENCHMARK(BM_FFT<float, false>)->Arg(1024)->Arg(4096);
|
||||
BENCHMARK(BM_FFT<std::complex<double>, true>)->Arg(1024)->Arg(4096);
|
||||
BENCHMARK(BM_FFT<std::complex<double>, false>)->Arg(1024)->Arg(4096);
|
||||
BENCHMARK(BM_FFT<double, true>)->Arg(1024)->Arg(4096);
|
||||
BENCHMARK(BM_FFT<double, false>)->Arg(1024)->Arg(4096);
|
||||
81
benchmarks/benchGemm.cpp
Normal file
81
benchmarks/benchGemm.cpp
Normal file
@@ -0,0 +1,81 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef SCALAR
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
typedef SCALAR Scalar;
|
||||
typedef Matrix<Scalar, Dynamic, Dynamic> Mat;
|
||||
|
||||
template <typename A, typename B, typename C>
|
||||
EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c) {
|
||||
c.noalias() += a * b;
|
||||
}
|
||||
|
||||
static void BM_EigenGemm(benchmark::State& state) {
|
||||
int m = state.range(0);
|
||||
int n = state.range(1);
|
||||
int p = state.range(2);
|
||||
Mat a(m, p);
|
||||
a.setRandom();
|
||||
Mat b(p, n);
|
||||
b.setRandom();
|
||||
Mat c = Mat::Zero(m, n);
|
||||
for (auto _ : state) {
|
||||
c.setZero();
|
||||
gemm(a, b, c);
|
||||
benchmark::DoNotOptimize(c.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(2.0 * m * n * p, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
static void GemmSizes(::benchmark::Benchmark* b) {
|
||||
for (int size : {8, 16, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 384, 448, 512, 768, 1024, 1536, 2048}) {
|
||||
b->Args({size, size, size});
|
||||
}
|
||||
// Non-square sizes
|
||||
b->Args({64, 64, 1024});
|
||||
b->Args({1024, 64, 64});
|
||||
b->Args({64, 1024, 64});
|
||||
b->Args({256, 256, 1024});
|
||||
b->Args({1024, 256, 256});
|
||||
}
|
||||
|
||||
BENCHMARK(BM_EigenGemm)->Apply(GemmSizes);
|
||||
|
||||
#ifdef HAVE_BLAS
|
||||
extern "C" {
|
||||
#include <Eigen/src/misc/blas.h>
|
||||
}
|
||||
|
||||
static void BM_BlasGemm(benchmark::State& state) {
|
||||
int m = state.range(0);
|
||||
int n = state.range(1);
|
||||
int p = state.range(2);
|
||||
Mat a(m, p);
|
||||
a.setRandom();
|
||||
Mat b(p, n);
|
||||
b.setRandom();
|
||||
Mat c = Mat::Zero(m, n);
|
||||
char notrans = 'N';
|
||||
Scalar one = 1, zero = 0;
|
||||
for (auto _ : state) {
|
||||
c.setZero();
|
||||
if constexpr (std::is_same_v<Scalar, float>) {
|
||||
sgemm_(¬rans, ¬rans, &m, &n, &p, &one, a.data(), &m, b.data(), &p, &one, c.data(), &m);
|
||||
} else {
|
||||
dgemm_(¬rans, ¬rans, &m, &n, &p, &one, a.data(), &m, b.data(), &p, &one, c.data(), &m);
|
||||
}
|
||||
benchmark::DoNotOptimize(c.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(2.0 * m * n * p, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
BENCHMARK(BM_BlasGemm)->Apply(GemmSizes);
|
||||
#endif
|
||||
160
benchmarks/benchGemv.cpp
Normal file
160
benchmarks/benchGemv.cpp
Normal file
@@ -0,0 +1,160 @@
|
||||
// Benchmark for dense general matrix-vector multiplication (GEMV).
|
||||
//
|
||||
// Tests performance of y += op(A) * x for various matrix sizes, aspect ratios,
|
||||
// scalar types, and operation variants (transpose, conjugate, adjoint).
|
||||
//
|
||||
// The Eigen GEMV kernel (Eigen/src/Core/products/GeneralMatrixVector.h) has
|
||||
// two main specializations:
|
||||
// - ColMajor kernel: used for y += A * x with column-major A.
|
||||
// Processes vertical panels, vectorizes along rows.
|
||||
// - RowMajor kernel: used for y += A^T * x with column-major A.
|
||||
// Processes groups of rows, vectorizes the dot product along columns.
|
||||
//
|
||||
// For complex scalars, conjugation flags (ConjugateLhs, ConjugateRhs) select
|
||||
// additional code paths within each kernel via conj_helper.
|
||||
//
|
||||
// Operation mapping (for column-major stored A):
|
||||
// Gemv y += A * x -> ColMajor kernel, no conjugation
|
||||
// GemvTrans y += A^T * x -> RowMajor kernel, no conjugation
|
||||
// GemvConj y += conj(A) * x -> ColMajor kernel, ConjugateLhs=true
|
||||
// GemvAdj y += A^H * x -> RowMajor kernel, ConjugateLhs=true
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
// ---------- Benchmark helpers ----------
|
||||
|
||||
// GEMV flop count: 2*m*n for real, 8*m*n for complex.
|
||||
template <typename Scalar>
|
||||
double gemvFlops(Index m, Index n) {
|
||||
return (NumTraits<Scalar>::IsComplex ? 8.0 : 2.0) * m * n;
|
||||
}
|
||||
|
||||
// ---------- y += A * x (ColMajor GEMV kernel, no conjugation) ----------
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_Gemv(benchmark::State& state) {
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
using Vec = Matrix<Scalar, Dynamic, 1>;
|
||||
const Index m = state.range(0);
|
||||
const Index n = state.range(1);
|
||||
Mat A = Mat::Random(m, n);
|
||||
Vec x = Vec::Random(n);
|
||||
Vec y = Vec::Random(m);
|
||||
for (auto _ : state) {
|
||||
y.noalias() += A * x;
|
||||
benchmark::DoNotOptimize(y.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
state.counters["GFLOPS"] = benchmark::Counter(gemvFlops<Scalar>(m, n), benchmark::Counter::kIsIterationInvariantRate,
|
||||
benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
// ---------- y += A^T * x (RowMajor GEMV kernel, no conjugation) ----------
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_GemvTrans(benchmark::State& state) {
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
using Vec = Matrix<Scalar, Dynamic, 1>;
|
||||
const Index m = state.range(0);
|
||||
const Index n = state.range(1);
|
||||
Mat A = Mat::Random(m, n);
|
||||
Vec x = Vec::Random(m);
|
||||
Vec y = Vec::Random(n);
|
||||
for (auto _ : state) {
|
||||
y.noalias() += A.transpose() * x;
|
||||
benchmark::DoNotOptimize(y.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
state.counters["GFLOPS"] = benchmark::Counter(gemvFlops<Scalar>(m, n), benchmark::Counter::kIsIterationInvariantRate,
|
||||
benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
// ---------- y += conj(A) * x (ColMajor kernel, ConjugateLhs=true) ----------
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_GemvConj(benchmark::State& state) {
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
using Vec = Matrix<Scalar, Dynamic, 1>;
|
||||
const Index m = state.range(0);
|
||||
const Index n = state.range(1);
|
||||
Mat A = Mat::Random(m, n);
|
||||
Vec x = Vec::Random(n);
|
||||
Vec y = Vec::Random(m);
|
||||
for (auto _ : state) {
|
||||
y.noalias() += A.conjugate() * x;
|
||||
benchmark::DoNotOptimize(y.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
state.counters["GFLOPS"] = benchmark::Counter(gemvFlops<Scalar>(m, n), benchmark::Counter::kIsIterationInvariantRate,
|
||||
benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
// ---------- y += A^H * x (RowMajor kernel, ConjugateLhs=true) ----------
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_GemvAdj(benchmark::State& state) {
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
using Vec = Matrix<Scalar, Dynamic, 1>;
|
||||
const Index m = state.range(0);
|
||||
const Index n = state.range(1);
|
||||
Mat A = Mat::Random(m, n);
|
||||
Vec x = Vec::Random(m);
|
||||
Vec y = Vec::Random(n);
|
||||
for (auto _ : state) {
|
||||
y.noalias() += A.adjoint() * x;
|
||||
benchmark::DoNotOptimize(y.data());
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
state.counters["GFLOPS"] = benchmark::Counter(gemvFlops<Scalar>(m, n), benchmark::Counter::kIsIterationInvariantRate,
|
||||
benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
// ---------- Size configurations ----------
|
||||
// All sizes refer to the stored matrix A (m rows, n cols).
|
||||
|
||||
static void GemvSizes(::benchmark::Benchmark* b) {
|
||||
// Square matrices: exercises balanced kernel behavior.
|
||||
for (int size : {8, 16, 32, 64, 128, 256, 512, 1024, 4096}) {
|
||||
b->Args({size, size});
|
||||
}
|
||||
// Tall-thin (m >> n): in ColMajor kernel, the inner vectorized loop over rows
|
||||
// is long while the outer column loop is short. In RowMajor kernel (transpose),
|
||||
// there are many rows to process but short dot products.
|
||||
for (int n : {1, 4, 16, 64}) {
|
||||
for (int m : {256, 1024, 4096}) {
|
||||
if (m != n) b->Args({m, n});
|
||||
}
|
||||
}
|
||||
// Short-wide (m << n): in ColMajor kernel, the outer column loop is long but
|
||||
// the inner vectorized loop over rows is short. In RowMajor kernel (transpose),
|
||||
// there are few rows but long dot products.
|
||||
for (int m : {1, 4, 16, 64}) {
|
||||
for (int n : {256, 1024, 4096}) {
|
||||
if (m != n) b->Args({m, n});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- Register benchmarks ----------
|
||||
|
||||
// Real types: Gemv and GemvTrans exercise the two kernel specializations.
|
||||
// Conjugation is a no-op for real scalars.
|
||||
|
||||
BENCHMARK(BM_Gemv<float>)->Apply(GemvSizes)->Name("Gemv_float");
|
||||
BENCHMARK(BM_Gemv<double>)->Apply(GemvSizes)->Name("Gemv_double");
|
||||
BENCHMARK(BM_GemvTrans<float>)->Apply(GemvSizes)->Name("GemvTrans_float");
|
||||
BENCHMARK(BM_GemvTrans<double>)->Apply(GemvSizes)->Name("GemvTrans_double");
|
||||
|
||||
// Complex types: all four variants exercise distinct kernel code paths.
|
||||
|
||||
BENCHMARK(BM_Gemv<std::complex<float>>)->Apply(GemvSizes)->Name("Gemv_cfloat");
|
||||
BENCHMARK(BM_Gemv<std::complex<double>>)->Apply(GemvSizes)->Name("Gemv_cdouble");
|
||||
BENCHMARK(BM_GemvTrans<std::complex<float>>)->Apply(GemvSizes)->Name("GemvTrans_cfloat");
|
||||
BENCHMARK(BM_GemvTrans<std::complex<double>>)->Apply(GemvSizes)->Name("GemvTrans_cdouble");
|
||||
BENCHMARK(BM_GemvConj<std::complex<float>>)->Apply(GemvSizes)->Name("GemvConj_cfloat");
|
||||
BENCHMARK(BM_GemvConj<std::complex<double>>)->Apply(GemvSizes)->Name("GemvConj_cdouble");
|
||||
BENCHMARK(BM_GemvAdj<std::complex<float>>)->Apply(GemvSizes)->Name("GemvAdj_cfloat");
|
||||
BENCHMARK(BM_GemvAdj<std::complex<double>>)->Apply(GemvSizes)->Name("GemvAdj_cdouble");
|
||||
43
benchmarks/benchGeometry.cpp
Normal file
43
benchmarks/benchGeometry.cpp
Normal file
@@ -0,0 +1,43 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
#include <Eigen/Geometry>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
template <typename Scalar, int Mode, int VSize>
|
||||
static void BM_TransformVec(benchmark::State& state) {
|
||||
typedef Transform<Scalar, 3, Mode> Trans;
|
||||
typedef Matrix<Scalar, VSize, 1> Vec;
|
||||
Trans t;
|
||||
t.setIdentity();
|
||||
Vec v;
|
||||
v.setRandom();
|
||||
for (auto _ : state) {
|
||||
v = t * v;
|
||||
benchmark::DoNotOptimize(v.data());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Scalar, int Mode>
|
||||
static void BM_TransformTransform(benchmark::State& state) {
|
||||
typedef Transform<Scalar, 3, Mode> Trans;
|
||||
Trans t1, t2;
|
||||
t1.setIdentity();
|
||||
t2.setIdentity();
|
||||
for (auto _ : state) {
|
||||
t2 = Trans(t1 * t2);
|
||||
benchmark::DoNotOptimize(t2.data());
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(BM_TransformVec<float, Isometry, 3>);
|
||||
BENCHMARK(BM_TransformVec<float, Isometry, 4>);
|
||||
BENCHMARK(BM_TransformVec<float, Projective, 4>);
|
||||
BENCHMARK(BM_TransformVec<double, Isometry, 3>);
|
||||
BENCHMARK(BM_TransformVec<double, Isometry, 4>);
|
||||
BENCHMARK(BM_TransformVec<double, Projective, 4>);
|
||||
|
||||
BENCHMARK(BM_TransformTransform<float, Isometry>);
|
||||
BENCHMARK(BM_TransformTransform<float, Projective>);
|
||||
BENCHMARK(BM_TransformTransform<double, Isometry>);
|
||||
BENCHMARK(BM_TransformTransform<double, Projective>);
|
||||
28
benchmarks/benchVecAdd.cpp
Normal file
28
benchmarks/benchVecAdd.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
static void BM_VecAdd(benchmark::State& state) {
|
||||
int size = state.range(0);
|
||||
VectorXf a = VectorXf::Random(size);
|
||||
VectorXf b = VectorXf::Random(size);
|
||||
for (auto _ : state) {
|
||||
a = a + b;
|
||||
benchmark::DoNotOptimize(a.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * size * sizeof(float) * 3);
|
||||
}
|
||||
BENCHMARK(BM_VecAdd)->RangeMultiplier(4)->Range(64, 1 << 20);
|
||||
|
||||
static void BM_MatAdd(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
MatrixXf a = MatrixXf::Random(n, n);
|
||||
MatrixXf b = MatrixXf::Random(n, n);
|
||||
for (auto _ : state) {
|
||||
a = a + b;
|
||||
benchmark::DoNotOptimize(a.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * n * sizeof(float) * 3);
|
||||
}
|
||||
BENCHMARK(BM_MatAdd)->RangeMultiplier(2)->Range(8, 512);
|
||||
41
benchmarks/bench_move_semantics.cpp
Normal file
41
benchmarks/bench_move_semantics.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
#include "../test/MovableScalar.h"
|
||||
#include <utility>
|
||||
|
||||
template <typename MatrixType>
|
||||
void copy_matrix(MatrixType& m) {
|
||||
MatrixType tmp(m);
|
||||
m = tmp;
|
||||
}
|
||||
|
||||
template <typename MatrixType>
|
||||
void move_matrix(MatrixType&& m) {
|
||||
MatrixType tmp(std::move(m));
|
||||
m = std::move(tmp);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_CopySemantics(benchmark::State& state) {
|
||||
using MatrixType = Eigen::Matrix<Eigen::MovableScalar<Scalar>, 1, 10>;
|
||||
MatrixType data = MatrixType::Random().eval();
|
||||
for (auto _ : state) {
|
||||
copy_matrix(data);
|
||||
benchmark::DoNotOptimize(data.data());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
static void BM_MoveSemantics(benchmark::State& state) {
|
||||
using MatrixType = Eigen::Matrix<Eigen::MovableScalar<Scalar>, 1, 10>;
|
||||
MatrixType data = MatrixType::Random().eval();
|
||||
for (auto _ : state) {
|
||||
move_matrix(std::move(data));
|
||||
benchmark::DoNotOptimize(data.data());
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(BM_CopySemantics<float>);
|
||||
BENCHMARK(BM_MoveSemantics<float>);
|
||||
BENCHMARK(BM_CopySemantics<double>);
|
||||
BENCHMARK(BM_MoveSemantics<double>);
|
||||
30
benchmarks/bench_reverse.cpp
Normal file
30
benchmarks/bench_reverse.cpp
Normal file
@@ -0,0 +1,30 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
static void BM_MatrixReverse(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
typedef Matrix<double, Dynamic, Dynamic> MatrixType;
|
||||
MatrixType a = MatrixType::Random(n, n);
|
||||
MatrixType b(n, n);
|
||||
for (auto _ : state) {
|
||||
b = a.reverse();
|
||||
benchmark::DoNotOptimize(b.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * n * sizeof(double));
|
||||
}
|
||||
BENCHMARK(BM_MatrixReverse)->RangeMultiplier(2)->Range(4, 512);
|
||||
|
||||
static void BM_VectorReverse(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
typedef Matrix<double, Dynamic, 1> VectorType;
|
||||
VectorType a = VectorType::Random(n);
|
||||
VectorType b(n);
|
||||
for (auto _ : state) {
|
||||
b = a.reverse();
|
||||
benchmark::DoNotOptimize(b.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(double));
|
||||
}
|
||||
BENCHMARK(BM_VectorReverse)->RangeMultiplier(4)->Range(16, 1 << 18);
|
||||
103
benchmarks/bench_svd.cpp
Normal file
103
benchmarks/bench_svd.cpp
Normal file
@@ -0,0 +1,103 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Dense>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
// Benchmark JacobiSVD and BDCSVD for various scalar types, matrix shapes,
|
||||
// and computation options.
|
||||
|
||||
// ---------- helpers ----------
|
||||
|
||||
template <typename Scalar>
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
|
||||
template <typename SVD>
|
||||
EIGEN_DONT_INLINE void do_compute(SVD& svd, const typename SVD::MatrixType& A) {
|
||||
svd.compute(A);
|
||||
}
|
||||
|
||||
// ---------- JacobiSVD ----------
|
||||
|
||||
template <typename Scalar, int Options>
|
||||
static void BM_JacobiSVD(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
Mat<Scalar> A = Mat<Scalar>::Random(rows, cols);
|
||||
JacobiSVD<Mat<Scalar>, Options> svd(rows, cols);
|
||||
for (auto _ : state) {
|
||||
do_compute(svd, A);
|
||||
benchmark::DoNotOptimize(svd.singularValues().data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
// ---------- BDCSVD ----------
|
||||
|
||||
template <typename Scalar, int Options>
|
||||
static void BM_BDCSVD(benchmark::State& state) {
|
||||
const Index rows = state.range(0);
|
||||
const Index cols = state.range(1);
|
||||
Mat<Scalar> A = Mat<Scalar>::Random(rows, cols);
|
||||
BDCSVD<Mat<Scalar>, Options> svd(rows, cols);
|
||||
for (auto _ : state) {
|
||||
do_compute(svd, A);
|
||||
benchmark::DoNotOptimize(svd.singularValues().data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
// ---------- Size configurations ----------
|
||||
|
||||
// Sizes suitable for JacobiSVD (O(n^2 p), expensive for large n).
|
||||
static void JacobiSizes(::benchmark::Benchmark* b) {
|
||||
// Square
|
||||
for (int s : {4, 8, 16, 32, 64, 128, 256, 512}) b->Args({s, s});
|
||||
// Tall-skinny
|
||||
b->Args({100, 4});
|
||||
b->Args({1000, 4});
|
||||
b->Args({1000, 10});
|
||||
}
|
||||
|
||||
// Sizes suitable for BDCSVD (divide-and-conquer, faster for large matrices).
|
||||
static void BDCSizes(::benchmark::Benchmark* b) {
|
||||
// Square
|
||||
for (int s : {4, 8, 16, 32, 64, 128, 256, 512, 1024}) b->Args({s, s});
|
||||
// Tall-skinny (triggers R-bidiagonalization when aspect ratio > 4)
|
||||
b->Args({100, 4});
|
||||
b->Args({1000, 4});
|
||||
b->Args({1000, 10});
|
||||
b->Args({1000, 100});
|
||||
b->Args({10000, 10});
|
||||
b->Args({10000, 100});
|
||||
}
|
||||
|
||||
// ---------- Register benchmarks ----------
|
||||
|
||||
// JacobiSVD — float
|
||||
BENCHMARK(BM_JacobiSVD<float, ComputeThinU | ComputeThinV>)->Apply(JacobiSizes)->Name("JacobiSVD_float_ThinUV");
|
||||
BENCHMARK(BM_JacobiSVD<float, 0>)->Apply(JacobiSizes)->Name("JacobiSVD_float_ValuesOnly");
|
||||
|
||||
// JacobiSVD — double
|
||||
BENCHMARK(BM_JacobiSVD<double, ComputeThinU | ComputeThinV>)->Apply(JacobiSizes)->Name("JacobiSVD_double_ThinUV");
|
||||
BENCHMARK(BM_JacobiSVD<double, 0>)->Apply(JacobiSizes)->Name("JacobiSVD_double_ValuesOnly");
|
||||
|
||||
// BDCSVD — float
|
||||
BENCHMARK(BM_BDCSVD<float, ComputeThinU | ComputeThinV>)->Apply(BDCSizes)->Name("BDCSVD_float_ThinUV");
|
||||
BENCHMARK(BM_BDCSVD<float, 0>)->Apply(BDCSizes)->Name("BDCSVD_float_ValuesOnly");
|
||||
|
||||
// BDCSVD — double
|
||||
BENCHMARK(BM_BDCSVD<double, ComputeThinU | ComputeThinV>)->Apply(BDCSizes)->Name("BDCSVD_double_ThinUV");
|
||||
BENCHMARK(BM_BDCSVD<double, 0>)->Apply(BDCSizes)->Name("BDCSVD_double_ValuesOnly");
|
||||
|
||||
// JacobiSVD — QR preconditioner comparison (double, 64x64, ThinUV)
|
||||
BENCHMARK(BM_JacobiSVD<double, ComputeThinU | ComputeThinV | ColPivHouseholderQRPreconditioner>)
|
||||
->Args({64, 64})
|
||||
->Args({1000, 10})
|
||||
->Name("JacobiSVD_double_ColPivQR");
|
||||
BENCHMARK(BM_JacobiSVD<double, ComputeThinU | ComputeThinV | HouseholderQRPreconditioner>)
|
||||
->Args({64, 64})
|
||||
->Args({1000, 10})
|
||||
->Name("JacobiSVD_double_HouseholderQR");
|
||||
BENCHMARK(BM_JacobiSVD<double, ComputeFullU | ComputeFullV | FullPivHouseholderQRPreconditioner>)
|
||||
->Args({64, 64})
|
||||
->Name("JacobiSVD_double_FullPivQR");
|
||||
99
benchmarks/bench_trsm.cpp
Normal file
99
benchmarks/bench_trsm.cpp
Normal file
@@ -0,0 +1,99 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Dense>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
// ---------- TRSV: triangular solve with single RHS vector ----------
|
||||
|
||||
template <typename Scalar, unsigned int Mode>
|
||||
static void BM_TRSV(benchmark::State& state) {
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
using Vec = Matrix<Scalar, Dynamic, 1>;
|
||||
const Index n = state.range(0);
|
||||
Mat A = Mat::Random(n, n);
|
||||
// Make diagonally dominant to ensure well-conditioned triangular part.
|
||||
A.diagonal().array() += Scalar(n);
|
||||
Vec x = Vec::Random(n);
|
||||
Vec b = x;
|
||||
for (auto _ : state) {
|
||||
x = b;
|
||||
A.template triangularView<Mode>().solveInPlace(x);
|
||||
benchmark::DoNotOptimize(x.data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations() * n * n);
|
||||
}
|
||||
|
||||
// ---------- TRSM: triangular solve with multiple RHS (OnTheLeft) ----------
|
||||
|
||||
template <typename Scalar, unsigned int Mode>
|
||||
static void BM_TRSM_Left(benchmark::State& state) {
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
const Index n = state.range(0);
|
||||
const Index nrhs = state.range(1);
|
||||
Mat A = Mat::Random(n, n);
|
||||
A.diagonal().array() += Scalar(n);
|
||||
Mat X = Mat::Random(n, nrhs);
|
||||
Mat B = X;
|
||||
for (auto _ : state) {
|
||||
X = B;
|
||||
A.template triangularView<Mode>().solveInPlace(X);
|
||||
benchmark::DoNotOptimize(X.data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations() * n * n * nrhs);
|
||||
}
|
||||
|
||||
// ---------- TRSM: triangular solve with multiple RHS (OnTheRight) ----------
|
||||
|
||||
template <typename Scalar, unsigned int Mode>
|
||||
static void BM_TRSM_Right(benchmark::State& state) {
|
||||
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
|
||||
const Index n = state.range(0);
|
||||
const Index nrhs = state.range(1);
|
||||
Mat A = Mat::Random(n, n);
|
||||
A.diagonal().array() += Scalar(n);
|
||||
Mat X = Mat::Random(nrhs, n);
|
||||
Mat B = X;
|
||||
for (auto _ : state) {
|
||||
X = B;
|
||||
A.template triangularView<Mode>().template solveInPlace<OnTheRight>(X);
|
||||
benchmark::DoNotOptimize(X.data());
|
||||
}
|
||||
state.SetItemsProcessed(state.iterations() * n * n * nrhs);
|
||||
}
|
||||
|
||||
// ---------- Size configurations ----------
|
||||
|
||||
static void TrsvSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {32, 64, 128, 256, 512, 1024}) {
|
||||
b->Args({n});
|
||||
}
|
||||
}
|
||||
|
||||
static void TrsmSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {32, 64, 128, 256, 512, 1024}) {
|
||||
for (int nrhs : {1, 4, 16, 64, 256}) {
|
||||
b->Args({n, nrhs});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- TRSV benchmarks ----------
|
||||
|
||||
BENCHMARK(BM_TRSV<float, Lower>)->Apply(TrsvSizes)->Name("TRSV_float_Lower");
|
||||
BENCHMARK(BM_TRSV<float, Upper>)->Apply(TrsvSizes)->Name("TRSV_float_Upper");
|
||||
BENCHMARK(BM_TRSV<double, Lower>)->Apply(TrsvSizes)->Name("TRSV_double_Lower");
|
||||
BENCHMARK(BM_TRSV<double, Upper>)->Apply(TrsvSizes)->Name("TRSV_double_Upper");
|
||||
|
||||
// ---------- TRSM Left benchmarks ----------
|
||||
|
||||
BENCHMARK(BM_TRSM_Left<float, Lower>)->Apply(TrsmSizes)->Name("TRSM_Left_float_Lower");
|
||||
BENCHMARK(BM_TRSM_Left<float, Upper>)->Apply(TrsmSizes)->Name("TRSM_Left_float_Upper");
|
||||
BENCHMARK(BM_TRSM_Left<double, Lower>)->Apply(TrsmSizes)->Name("TRSM_Left_double_Lower");
|
||||
BENCHMARK(BM_TRSM_Left<double, Upper>)->Apply(TrsmSizes)->Name("TRSM_Left_double_Upper");
|
||||
|
||||
// ---------- TRSM Right benchmarks ----------
|
||||
|
||||
BENCHMARK(BM_TRSM_Right<float, Lower>)->Apply(TrsmSizes)->Name("TRSM_Right_float_Lower");
|
||||
BENCHMARK(BM_TRSM_Right<float, Upper>)->Apply(TrsmSizes)->Name("TRSM_Right_float_Upper");
|
||||
BENCHMARK(BM_TRSM_Right<double, Lower>)->Apply(TrsmSizes)->Name("TRSM_Right_double_Lower");
|
||||
BENCHMARK(BM_TRSM_Right<double, Upper>)->Apply(TrsmSizes)->Name("TRSM_Right_double_Upper");
|
||||
123
benchmarks/benchmark_aocl.cpp
Normal file
123
benchmarks/benchmark_aocl.cpp
Normal file
@@ -0,0 +1,123 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
#include <Eigen/Dense>
|
||||
#include <Eigen/Eigenvalues>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
static void BM_VectorExp(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
VectorXd v = VectorXd::LinSpaced(n, 0.1, 10.0);
|
||||
VectorXd result(n);
|
||||
for (auto _ : state) {
|
||||
result = v.array().exp();
|
||||
benchmark::DoNotOptimize(result.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(double));
|
||||
}
|
||||
|
||||
static void BM_VectorSin(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
VectorXd v = VectorXd::LinSpaced(n, 0.1, 10.0);
|
||||
VectorXd result(n);
|
||||
for (auto _ : state) {
|
||||
result = v.array().sin();
|
||||
benchmark::DoNotOptimize(result.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(double));
|
||||
}
|
||||
|
||||
static void BM_VectorCos(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
VectorXd v = VectorXd::LinSpaced(n, 0.1, 10.0);
|
||||
VectorXd result(n);
|
||||
for (auto _ : state) {
|
||||
result = v.array().cos();
|
||||
benchmark::DoNotOptimize(result.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(double));
|
||||
}
|
||||
|
||||
static void BM_VectorSqrt(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
VectorXd v = VectorXd::LinSpaced(n, 0.1, 10.0);
|
||||
VectorXd result(n);
|
||||
for (auto _ : state) {
|
||||
result = v.array().sqrt();
|
||||
benchmark::DoNotOptimize(result.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(double));
|
||||
}
|
||||
|
||||
static void BM_VectorLog(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
VectorXd v = VectorXd::LinSpaced(n, 0.1, 10.0);
|
||||
VectorXd result(n);
|
||||
for (auto _ : state) {
|
||||
result = v.array().log();
|
||||
benchmark::DoNotOptimize(result.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(double));
|
||||
}
|
||||
|
||||
static void BM_VectorTanh(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
VectorXd v = VectorXd::LinSpaced(n, 0.1, 10.0);
|
||||
VectorXd result(n);
|
||||
for (auto _ : state) {
|
||||
result = v.array().tanh();
|
||||
benchmark::DoNotOptimize(result.data());
|
||||
}
|
||||
state.SetBytesProcessed(state.iterations() * n * sizeof(double));
|
||||
}
|
||||
|
||||
static void VectorSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {10000, 100000, 1000000, 5000000}) {
|
||||
b->Arg(n);
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(BM_VectorExp)->Apply(VectorSizes);
|
||||
BENCHMARK(BM_VectorSin)->Apply(VectorSizes);
|
||||
BENCHMARK(BM_VectorCos)->Apply(VectorSizes);
|
||||
BENCHMARK(BM_VectorSqrt)->Apply(VectorSizes);
|
||||
BENCHMARK(BM_VectorLog)->Apply(VectorSizes);
|
||||
BENCHMARK(BM_VectorTanh)->Apply(VectorSizes);
|
||||
|
||||
static void BM_DGEMM(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
MatrixXd A = MatrixXd::Random(n, n);
|
||||
MatrixXd B = MatrixXd::Random(n, n);
|
||||
MatrixXd C(n, n);
|
||||
for (auto _ : state) {
|
||||
C.noalias() = A * B;
|
||||
benchmark::DoNotOptimize(C.data());
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(2.0 * n * n * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
BENCHMARK(BM_DGEMM)->Arg(256)->Arg(512)->Arg(1024)->Arg(2048);
|
||||
|
||||
static void BM_EigenDecomposition(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
MatrixXd M = MatrixXd::Random(n, n);
|
||||
M = (M + M.transpose()) * 0.5;
|
||||
SelfAdjointEigenSolver<MatrixXd> solver;
|
||||
for (auto _ : state) {
|
||||
solver.compute(M);
|
||||
benchmark::DoNotOptimize(solver.eigenvalues().data());
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_EigenDecomposition)->Arg(256)->Arg(512)->Arg(1024);
|
||||
|
||||
static void BM_FSI_Risk(benchmark::State& state) {
|
||||
int numPeriods = state.range(0);
|
||||
int numAssets = state.range(1);
|
||||
MatrixXd returns = MatrixXd::Random(numPeriods, numAssets);
|
||||
for (auto _ : state) {
|
||||
MatrixXd cov = (returns.transpose() * returns) / (numPeriods - 1);
|
||||
SelfAdjointEigenSolver<MatrixXd> solver(cov);
|
||||
benchmark::DoNotOptimize(solver.eigenvalues().data());
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_FSI_Risk)->Args({10000, 500});
|
||||
76
benchmarks/benchmark_blocking_sizes.cpp
Normal file
76
benchmarks/benchmark_blocking_sizes.cpp
Normal file
@@ -0,0 +1,76 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
bool eigen_use_specific_block_size;
|
||||
int eigen_block_size_k, eigen_block_size_m, eigen_block_size_n;
|
||||
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES eigen_use_specific_block_size
|
||||
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K eigen_block_size_k
|
||||
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M eigen_block_size_m
|
||||
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N eigen_block_size_n
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
typedef MatrixXf MatrixType;
|
||||
typedef MatrixType::Scalar Scalar;
|
||||
|
||||
static void BM_GemmDefaultBlocking(benchmark::State& state) {
|
||||
int k = state.range(0);
|
||||
int m = state.range(1);
|
||||
int n = state.range(2);
|
||||
eigen_use_specific_block_size = false;
|
||||
MatrixType lhs = MatrixType::Random(m, k);
|
||||
MatrixType rhs = MatrixType::Random(k, n);
|
||||
MatrixType dst = MatrixType::Zero(m, n);
|
||||
for (auto _ : state) {
|
||||
dst.noalias() = lhs * rhs;
|
||||
benchmark::DoNotOptimize(dst.data());
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(2.0 * k * m * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
static void BM_GemmCustomBlocking(benchmark::State& state) {
|
||||
int k = state.range(0);
|
||||
int m = state.range(1);
|
||||
int n = state.range(2);
|
||||
int bk = state.range(3);
|
||||
int bm = state.range(4);
|
||||
int bn = state.range(5);
|
||||
eigen_use_specific_block_size = true;
|
||||
eigen_block_size_k = bk;
|
||||
eigen_block_size_m = bm;
|
||||
eigen_block_size_n = bn;
|
||||
MatrixType lhs = MatrixType::Random(m, k);
|
||||
MatrixType rhs = MatrixType::Random(k, n);
|
||||
MatrixType dst = MatrixType::Zero(m, n);
|
||||
for (auto _ : state) {
|
||||
dst.noalias() = lhs * rhs;
|
||||
benchmark::DoNotOptimize(dst.data());
|
||||
}
|
||||
state.counters["GFLOPS"] =
|
||||
benchmark::Counter(2.0 * k * m * n, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
||||
}
|
||||
|
||||
static void DefaultBlockingSizes(::benchmark::Benchmark* b) {
|
||||
for (int s : {64, 128, 256, 512, 1024, 2048}) {
|
||||
b->Args({s, s, s});
|
||||
}
|
||||
}
|
||||
|
||||
static void CustomBlockingSizes(::benchmark::Benchmark* b) {
|
||||
// Test a few product sizes with varying block sizes
|
||||
for (int s : {256, 512, 1024}) {
|
||||
for (int bk : {16, 32, 64, 128, 256}) {
|
||||
if (bk > s) continue;
|
||||
for (int bm : {16, 32, 64, 128, 256}) {
|
||||
if (bm > s) continue;
|
||||
b->Args({s, s, s, bk, bm, s});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(BM_GemmDefaultBlocking)->Apply(DefaultBlockingSizes);
|
||||
BENCHMARK(BM_GemmCustomBlocking)->Apply(CustomBlockingSizes);
|
||||
167
benchmarks/dense_solvers.cpp
Normal file
167
benchmarks/dense_solvers.cpp
Normal file
@@ -0,0 +1,167 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Dense>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
typedef float Scalar;
|
||||
typedef Matrix<Scalar, Dynamic, Dynamic> Mat;
|
||||
|
||||
template <typename Solver>
|
||||
EIGEN_DONT_INLINE void compute_norm_equation(Solver& solver, const Mat& A) {
|
||||
if (A.rows() != A.cols())
|
||||
solver.compute(A.transpose() * A);
|
||||
else
|
||||
solver.compute(A);
|
||||
}
|
||||
|
||||
template <typename Solver>
|
||||
EIGEN_DONT_INLINE void compute(Solver& solver, const Mat& A) {
|
||||
solver.compute(A);
|
||||
}
|
||||
|
||||
static void BM_LLT(benchmark::State& state) {
|
||||
int rows = state.range(0);
|
||||
int cols = state.range(1);
|
||||
int size = cols;
|
||||
Mat A(rows, cols);
|
||||
A.setRandom();
|
||||
if (rows == cols) A = A * A.adjoint();
|
||||
LLT<Mat> solver(size);
|
||||
for (auto _ : state) {
|
||||
compute_norm_equation(solver, A);
|
||||
benchmark::DoNotOptimize(solver.matrixLLT().data());
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_LDLT(benchmark::State& state) {
|
||||
int rows = state.range(0);
|
||||
int cols = state.range(1);
|
||||
int size = cols;
|
||||
Mat A(rows, cols);
|
||||
A.setRandom();
|
||||
if (rows == cols) A = A * A.adjoint();
|
||||
LDLT<Mat> solver(size);
|
||||
for (auto _ : state) {
|
||||
compute_norm_equation(solver, A);
|
||||
benchmark::DoNotOptimize(solver.matrixLDLT().data());
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_PartialPivLU(benchmark::State& state) {
|
||||
int rows = state.range(0);
|
||||
int cols = state.range(1);
|
||||
int size = cols;
|
||||
Mat A(rows, cols);
|
||||
A.setRandom();
|
||||
if (rows == cols) A = A * A.adjoint();
|
||||
PartialPivLU<Mat> solver(size);
|
||||
for (auto _ : state) {
|
||||
compute_norm_equation(solver, A);
|
||||
benchmark::DoNotOptimize(solver.matrixLU().data());
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_FullPivLU(benchmark::State& state) {
|
||||
int rows = state.range(0);
|
||||
int cols = state.range(1);
|
||||
int size = cols;
|
||||
Mat A(rows, cols);
|
||||
A.setRandom();
|
||||
if (rows == cols) A = A * A.adjoint();
|
||||
FullPivLU<Mat> solver(size, size);
|
||||
for (auto _ : state) {
|
||||
compute_norm_equation(solver, A);
|
||||
benchmark::DoNotOptimize(solver.matrixLU().data());
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_HouseholderQR(benchmark::State& state) {
|
||||
int rows = state.range(0);
|
||||
int cols = state.range(1);
|
||||
Mat A = Mat::Random(rows, cols);
|
||||
HouseholderQR<Mat> solver(rows, cols);
|
||||
for (auto _ : state) {
|
||||
compute(solver, A);
|
||||
benchmark::DoNotOptimize(solver.matrixQR().data());
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_ColPivHouseholderQR(benchmark::State& state) {
|
||||
int rows = state.range(0);
|
||||
int cols = state.range(1);
|
||||
Mat A = Mat::Random(rows, cols);
|
||||
ColPivHouseholderQR<Mat> solver(rows, cols);
|
||||
for (auto _ : state) {
|
||||
compute(solver, A);
|
||||
benchmark::DoNotOptimize(solver.matrixQR().data());
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_COD(benchmark::State& state) {
|
||||
int rows = state.range(0);
|
||||
int cols = state.range(1);
|
||||
Mat A = Mat::Random(rows, cols);
|
||||
CompleteOrthogonalDecomposition<Mat> solver(rows, cols);
|
||||
for (auto _ : state) {
|
||||
compute(solver, A);
|
||||
benchmark::DoNotOptimize(solver.matrixQTZ().data());
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_FullPivHouseholderQR(benchmark::State& state) {
|
||||
int rows = state.range(0);
|
||||
int cols = state.range(1);
|
||||
Mat A = Mat::Random(rows, cols);
|
||||
FullPivHouseholderQR<Mat> solver(rows, cols);
|
||||
for (auto _ : state) {
|
||||
compute(solver, A);
|
||||
benchmark::DoNotOptimize(solver.matrixQR().data());
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_JacobiSVD(benchmark::State& state) {
|
||||
int rows = state.range(0);
|
||||
int cols = state.range(1);
|
||||
Mat A = Mat::Random(rows, cols);
|
||||
JacobiSVD<Mat, ComputeThinU | ComputeThinV> solver(rows, cols);
|
||||
for (auto _ : state) {
|
||||
solver.compute(A);
|
||||
benchmark::DoNotOptimize(solver.singularValues().data());
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_BDCSVD(benchmark::State& state) {
|
||||
int rows = state.range(0);
|
||||
int cols = state.range(1);
|
||||
Mat A = Mat::Random(rows, cols);
|
||||
BDCSVD<Mat, ComputeThinU | ComputeThinV> solver(rows, cols);
|
||||
for (auto _ : state) {
|
||||
solver.compute(A);
|
||||
benchmark::DoNotOptimize(solver.singularValues().data());
|
||||
}
|
||||
}
|
||||
|
||||
static void DenseSolverSizes(::benchmark::Benchmark* b) {
|
||||
// Square sizes
|
||||
for (int s : {8, 100, 1000}) {
|
||||
b->Args({s, s});
|
||||
}
|
||||
// Tall-skinny sizes
|
||||
b->Args({10000, 8});
|
||||
b->Args({10000, 100});
|
||||
}
|
||||
|
||||
BENCHMARK(BM_LLT)->Apply(DenseSolverSizes);
|
||||
BENCHMARK(BM_LDLT)->Apply(DenseSolverSizes);
|
||||
BENCHMARK(BM_PartialPivLU)->Apply(DenseSolverSizes);
|
||||
BENCHMARK(BM_FullPivLU)->Apply(DenseSolverSizes);
|
||||
BENCHMARK(BM_HouseholderQR)->Apply(DenseSolverSizes);
|
||||
BENCHMARK(BM_ColPivHouseholderQR)->Apply(DenseSolverSizes);
|
||||
BENCHMARK(BM_COD)->Apply(DenseSolverSizes);
|
||||
BENCHMARK(BM_FullPivHouseholderQR)->Apply(DenseSolverSizes);
|
||||
BENCHMARK(BM_JacobiSVD)->Apply([](::benchmark::Benchmark* b) {
|
||||
// JacobiSVD is very slow for large matrices
|
||||
for (int s : {8, 100}) b->Args({s, s});
|
||||
b->Args({10000, 8});
|
||||
});
|
||||
BENCHMARK(BM_BDCSVD)->Apply(DenseSolverSizes);
|
||||
97
benchmarks/eig33.cpp
Normal file
97
benchmarks/eig33.cpp
Normal file
@@ -0,0 +1,97 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
#include <Eigen/Eigenvalues>
|
||||
#include <Eigen/Geometry>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
template <typename Matrix, typename Roots>
|
||||
inline void computeRoots(const Matrix& m, Roots& roots) {
|
||||
typedef typename Matrix::Scalar Scalar;
|
||||
const Scalar s_inv3 = 1.0 / 3.0;
|
||||
const Scalar s_sqrt3 = std::sqrt(Scalar(3.0));
|
||||
Scalar c0 = m(0, 0) * m(1, 1) * m(2, 2) + Scalar(2) * m(0, 1) * m(0, 2) * m(1, 2) - m(0, 0) * m(1, 2) * m(1, 2) -
|
||||
m(1, 1) * m(0, 2) * m(0, 2) - m(2, 2) * m(0, 1) * m(0, 1);
|
||||
Scalar c1 = m(0, 0) * m(1, 1) - m(0, 1) * m(0, 1) + m(0, 0) * m(2, 2) - m(0, 2) * m(0, 2) + m(1, 1) * m(2, 2) -
|
||||
m(1, 2) * m(1, 2);
|
||||
Scalar c2 = m(0, 0) + m(1, 1) + m(2, 2);
|
||||
Scalar c2_over_3 = c2 * s_inv3;
|
||||
Scalar a_over_3 = (c1 - c2 * c2_over_3) * s_inv3;
|
||||
if (a_over_3 > Scalar(0)) a_over_3 = Scalar(0);
|
||||
Scalar half_b = Scalar(0.5) * (c0 + c2_over_3 * (Scalar(2) * c2_over_3 * c2_over_3 - c1));
|
||||
Scalar q = half_b * half_b + a_over_3 * a_over_3 * a_over_3;
|
||||
if (q > Scalar(0)) q = Scalar(0);
|
||||
Scalar rho = std::sqrt(-a_over_3);
|
||||
Scalar theta = std::atan2(std::sqrt(-q), half_b) * s_inv3;
|
||||
Scalar cos_theta = std::cos(theta);
|
||||
Scalar sin_theta = std::sin(theta);
|
||||
roots(2) = c2_over_3 + Scalar(2) * rho * cos_theta;
|
||||
roots(0) = c2_over_3 - rho * (cos_theta + s_sqrt3 * sin_theta);
|
||||
roots(1) = c2_over_3 - rho * (cos_theta - s_sqrt3 * sin_theta);
|
||||
}
|
||||
|
||||
template <typename Matrix, typename Vector>
|
||||
void eigen33(const Matrix& mat, Matrix& evecs, Vector& evals) {
|
||||
typedef typename Matrix::Scalar Scalar;
|
||||
Scalar shift = mat.trace() / 3;
|
||||
Matrix scaledMat = mat;
|
||||
scaledMat.diagonal().array() -= shift;
|
||||
Scalar scale = scaledMat.cwiseAbs().maxCoeff();
|
||||
scale = std::max(scale, Scalar(1));
|
||||
scaledMat /= scale;
|
||||
computeRoots(scaledMat, evals);
|
||||
if ((evals(2) - evals(0)) <= Eigen::NumTraits<Scalar>::epsilon()) {
|
||||
evecs.setIdentity();
|
||||
} else {
|
||||
Matrix tmp;
|
||||
tmp = scaledMat;
|
||||
tmp.diagonal().array() -= evals(2);
|
||||
evecs.col(2) = tmp.row(0).cross(tmp.row(1)).normalized();
|
||||
tmp = scaledMat;
|
||||
tmp.diagonal().array() -= evals(1);
|
||||
evecs.col(1) = tmp.row(0).cross(tmp.row(1));
|
||||
Scalar n1 = evecs.col(1).norm();
|
||||
if (n1 <= Eigen::NumTraits<Scalar>::epsilon())
|
||||
evecs.col(1) = evecs.col(2).unitOrthogonal();
|
||||
else
|
||||
evecs.col(1) /= n1;
|
||||
evecs.col(1) = evecs.col(2).cross(evecs.col(1).cross(evecs.col(2))).normalized();
|
||||
evecs.col(0) = evecs.col(2).cross(evecs.col(1));
|
||||
}
|
||||
evals *= scale;
|
||||
evals.array() += shift;
|
||||
}
|
||||
|
||||
static void BM_Eig33_Iterative(benchmark::State& state) {
|
||||
Matrix3d A = Matrix3d::Random();
|
||||
A = A.adjoint() * A;
|
||||
SelfAdjointEigenSolver<Matrix3d> eig(A);
|
||||
for (auto _ : state) {
|
||||
eig.compute(A);
|
||||
benchmark::DoNotOptimize(eig.eigenvalues().data());
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_Eig33_Iterative);
|
||||
|
||||
static void BM_Eig33_Direct(benchmark::State& state) {
|
||||
Matrix3d A = Matrix3d::Random();
|
||||
A = A.adjoint() * A;
|
||||
SelfAdjointEigenSolver<Matrix3d> eig(A);
|
||||
for (auto _ : state) {
|
||||
eig.computeDirect(A);
|
||||
benchmark::DoNotOptimize(eig.eigenvalues().data());
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_Eig33_Direct);
|
||||
|
||||
static void BM_Eig33_Custom(benchmark::State& state) {
|
||||
Matrix3d A = Matrix3d::Random();
|
||||
A = A.adjoint() * A;
|
||||
Matrix3d evecs;
|
||||
Vector3d evals;
|
||||
for (auto _ : state) {
|
||||
eigen33(A, evecs, evals);
|
||||
benchmark::DoNotOptimize(evals.data());
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_Eig33_Custom);
|
||||
71
benchmarks/geometry.cpp
Normal file
71
benchmarks/geometry.cpp
Normal file
@@ -0,0 +1,71 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Geometry>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
// Helper to get dimension from various transform types
|
||||
template <typename T>
|
||||
struct TransformDim {
|
||||
static constexpr int value = T::Dim;
|
||||
};
|
||||
|
||||
template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
|
||||
struct TransformDim<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>> {
|
||||
static constexpr int value = Rows;
|
||||
};
|
||||
|
||||
template <typename Transformation, int N>
|
||||
static void BM_TransformData(benchmark::State& state) {
|
||||
typedef typename Transformation::Scalar Scalar;
|
||||
constexpr int Dim = TransformDim<Transformation>::value;
|
||||
Transformation t;
|
||||
if constexpr (std::is_same_v<Transformation, Matrix<Scalar, Dim, Dim>>) {
|
||||
t.setRandom();
|
||||
} else {
|
||||
Matrix<Scalar, Dim, Dim + 1> mat;
|
||||
mat.setRandom();
|
||||
t = Transformation(mat);
|
||||
}
|
||||
Matrix<Scalar, Dim, N> data;
|
||||
data.setRandom();
|
||||
for (auto _ : state) {
|
||||
data = t * data;
|
||||
benchmark::DoNotOptimize(data.data());
|
||||
}
|
||||
}
|
||||
|
||||
// For quaternion: apply per-column
|
||||
template <typename Scalar, int N>
|
||||
static void BM_QuatTransform(benchmark::State& state) {
|
||||
Quaternion<Scalar> q;
|
||||
q.setIdentity();
|
||||
Matrix<Scalar, 3, N> data;
|
||||
data.setRandom();
|
||||
for (auto _ : state) {
|
||||
for (int i = 0; i < N; ++i) data.col(i) = q * data.col(i);
|
||||
benchmark::DoNotOptimize(data.data());
|
||||
}
|
||||
}
|
||||
|
||||
// Use typedefs to avoid commas in macro arguments
|
||||
typedef Transform<float, 3, Isometry> Isometry3f_t;
|
||||
typedef Transform<float, 3, Affine> Affine3f_t;
|
||||
typedef Transform<float, 3, AffineCompact> AffineCompact3f_t;
|
||||
typedef Matrix<float, 3, 3> Matrix3f_t;
|
||||
|
||||
BENCHMARK(BM_TransformData<Isometry3f_t, 1>);
|
||||
BENCHMARK(BM_TransformData<Isometry3f_t, 4>);
|
||||
BENCHMARK(BM_TransformData<Isometry3f_t, 8>);
|
||||
BENCHMARK(BM_TransformData<Affine3f_t, 1>);
|
||||
BENCHMARK(BM_TransformData<Affine3f_t, 4>);
|
||||
BENCHMARK(BM_TransformData<Affine3f_t, 8>);
|
||||
BENCHMARK(BM_TransformData<AffineCompact3f_t, 1>);
|
||||
BENCHMARK(BM_TransformData<AffineCompact3f_t, 4>);
|
||||
BENCHMARK(BM_TransformData<AffineCompact3f_t, 8>);
|
||||
BENCHMARK(BM_TransformData<Matrix3f_t, 1>);
|
||||
BENCHMARK(BM_TransformData<Matrix3f_t, 4>);
|
||||
BENCHMARK(BM_TransformData<Matrix3f_t, 8>);
|
||||
|
||||
BENCHMARK(BM_QuatTransform<float, 1>);
|
||||
BENCHMARK(BM_QuatTransform<float, 4>);
|
||||
BENCHMARK(BM_QuatTransform<float, 8>);
|
||||
51
benchmarks/print_blocking.cpp
Normal file
51
benchmarks/print_blocking.cpp
Normal file
@@ -0,0 +1,51 @@
|
||||
#include <Eigen/Core>
|
||||
#include <cstdio>
|
||||
|
||||
using namespace Eigen;
|
||||
using namespace Eigen::internal;
|
||||
|
||||
int main() {
|
||||
printf("%-8s %-8s %-8s | %-8s %-8s %-8s | %-8s %-8s %-8s | %-8s %-8s %-8s\n", "m", "n", "k", "kc_f", "mc_f", "nc_f",
|
||||
"kc_d", "mc_d", "nc_d", "mr_f", "nr_f", "LhsPr_f");
|
||||
|
||||
// Print gebp_traits info
|
||||
{
|
||||
using Traits = gebp_traits<float, float>;
|
||||
printf("Float traits: mr=%d, nr=%d, LhsProgress=%d, RhsProgress=%d, NumberOfRegisters=%d\n", Traits::mr, Traits::nr,
|
||||
Traits::LhsProgress, Traits::RhsProgress, Traits::NumberOfRegisters);
|
||||
}
|
||||
{
|
||||
using Traits = gebp_traits<double, double>;
|
||||
printf("Double traits: mr=%d, nr=%d, LhsProgress=%d, RhsProgress=%d, NumberOfRegisters=%d\n", Traits::mr,
|
||||
Traits::nr, Traits::LhsProgress, Traits::RhsProgress, Traits::NumberOfRegisters);
|
||||
}
|
||||
|
||||
// Print cache sizes
|
||||
std::ptrdiff_t l1, l2, l3;
|
||||
manage_caching_sizes(GetAction, &l1, &l2, &l3);
|
||||
printf("Cache sizes: L1=%ld, L2=%ld, L3=%ld\n", (long)l1, (long)l2, (long)l3);
|
||||
|
||||
for (int size : {8, 16, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 384, 448, 512, 768, 1024, 1536, 2048}) {
|
||||
{
|
||||
Index kf = size, mf = size, nf = size;
|
||||
computeProductBlockingSizes<float, float>(kf, mf, nf);
|
||||
Index kd = size, md = size, nd = size;
|
||||
computeProductBlockingSizes<double, double>(kd, md, nd);
|
||||
printf("%-8d %-8d %-8d | %-8ld %-8ld %-8ld | %-8ld %-8ld %-8ld\n", size, size, size, (long)kf, (long)mf, (long)nf,
|
||||
(long)kd, (long)md, (long)nd);
|
||||
}
|
||||
}
|
||||
|
||||
// Non-square
|
||||
for (auto [m, n, k] :
|
||||
std::initializer_list<std::tuple<int, int, int>>{{64, 64, 1024}, {1024, 64, 64}, {64, 1024, 64}}) {
|
||||
Index kf = k, mf = m, nf = n;
|
||||
computeProductBlockingSizes<float, float>(kf, mf, nf);
|
||||
Index kd = k, md = m, nd = n;
|
||||
computeProductBlockingSizes<double, double>(kd, md, nd);
|
||||
printf("%-8d %-8d %-8d | %-8ld %-8ld %-8ld | %-8ld %-8ld %-8ld\n", m, n, k, (long)kf, (long)mf, (long)nf, (long)kd,
|
||||
(long)md, (long)nd);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
38
benchmarks/quatmul.cpp
Normal file
38
benchmarks/quatmul.cpp
Normal file
@@ -0,0 +1,38 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Core>
|
||||
#include <Eigen/Geometry>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
template <typename Quat>
|
||||
EIGEN_DONT_INLINE void quatmul_default(const Quat& a, const Quat& b, Quat& c) {
|
||||
c = a * b;
|
||||
}
|
||||
|
||||
template <typename Quat>
|
||||
EIGEN_DONT_INLINE void quatmul_novec(const Quat& a, const Quat& b, Quat& c) {
|
||||
c = internal::quat_product<0, Quat, Quat, typename Quat::Scalar>::run(a, b);
|
||||
}
|
||||
|
||||
template <typename Quat>
|
||||
static void BM_QuatMul_Default(benchmark::State& state) {
|
||||
Quat a(4, 1, 2, 3), b(2, 3, 4, 5), c;
|
||||
for (auto _ : state) {
|
||||
quatmul_default(a, b, c);
|
||||
benchmark::DoNotOptimize(c.coeffs().data());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Quat>
|
||||
static void BM_QuatMul_NoVec(benchmark::State& state) {
|
||||
Quat a(4, 1, 2, 3), b(2, 3, 4, 5), c;
|
||||
for (auto _ : state) {
|
||||
quatmul_novec(a, b, c);
|
||||
benchmark::DoNotOptimize(c.coeffs().data());
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(BM_QuatMul_Default<Quaternionf>);
|
||||
BENCHMARK(BM_QuatMul_NoVec<Quaternionf>);
|
||||
BENCHMARK(BM_QuatMul_Default<Quaterniond>);
|
||||
BENCHMARK(BM_QuatMul_NoVec<Quaterniond>);
|
||||
64
benchmarks/sparse_dense_product.cpp
Normal file
64
benchmarks/sparse_dense_product.cpp
Normal file
@@ -0,0 +1,64 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Sparse>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
typedef double Scalar;
|
||||
typedef SparseMatrix<Scalar> SpMat;
|
||||
typedef Matrix<Scalar, Dynamic, 1> DenseVec;
|
||||
|
||||
static void fillMatrix(int nnzPerCol, int rows, int cols, SpMat& dst) {
|
||||
dst.resize(rows, cols);
|
||||
dst.reserve(VectorXi::Constant(cols, nnzPerCol));
|
||||
for (int j = 0; j < cols; ++j) {
|
||||
std::set<int> used;
|
||||
for (int i = 0; i < nnzPerCol; ++i) {
|
||||
int row;
|
||||
do {
|
||||
row = internal::random<int>(0, rows - 1);
|
||||
} while (used.count(row));
|
||||
used.insert(row);
|
||||
dst.insert(row, j) = internal::random<Scalar>();
|
||||
}
|
||||
}
|
||||
dst.makeCompressed();
|
||||
}
|
||||
|
||||
static void BM_SpMV(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
int nnzPerCol = state.range(1);
|
||||
SpMat sm(n, n);
|
||||
fillMatrix(nnzPerCol, n, n, sm);
|
||||
DenseVec v = DenseVec::Random(n);
|
||||
DenseVec res(n);
|
||||
for (auto _ : state) {
|
||||
res.noalias() = sm * v;
|
||||
benchmark::DoNotOptimize(res.data());
|
||||
}
|
||||
state.counters["nnz"] = sm.nonZeros();
|
||||
}
|
||||
|
||||
static void BM_SpMV_Transpose(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
int nnzPerCol = state.range(1);
|
||||
SpMat sm(n, n);
|
||||
fillMatrix(nnzPerCol, n, n, sm);
|
||||
DenseVec v = DenseVec::Random(n);
|
||||
DenseVec res(n);
|
||||
for (auto _ : state) {
|
||||
res.noalias() = sm.transpose() * v;
|
||||
benchmark::DoNotOptimize(res.data());
|
||||
}
|
||||
state.counters["nnz"] = sm.nonZeros();
|
||||
}
|
||||
|
||||
static void SpMVSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {1000, 10000, 100000}) {
|
||||
for (int nnz : {7, 20, 50}) {
|
||||
b->Args({n, nnz});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(BM_SpMV)->Apply(SpMVSizes);
|
||||
BENCHMARK(BM_SpMV_Transpose)->Apply(SpMVSizes);
|
||||
49
benchmarks/sparse_product.cpp
Normal file
49
benchmarks/sparse_product.cpp
Normal file
@@ -0,0 +1,49 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Sparse>
|
||||
#include <set>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
typedef double Scalar;
|
||||
typedef SparseMatrix<Scalar> SpMat;
|
||||
|
||||
static void fillMatrix(int nnzPerCol, int rows, int cols, SpMat& dst) {
|
||||
dst.resize(rows, cols);
|
||||
dst.reserve(VectorXi::Constant(cols, nnzPerCol));
|
||||
for (int j = 0; j < cols; ++j) {
|
||||
std::set<int> used;
|
||||
for (int i = 0; i < nnzPerCol; ++i) {
|
||||
int row;
|
||||
do {
|
||||
row = internal::random<int>(0, rows - 1);
|
||||
} while (used.count(row));
|
||||
used.insert(row);
|
||||
dst.insert(row, j) = internal::random<Scalar>();
|
||||
}
|
||||
}
|
||||
dst.makeCompressed();
|
||||
}
|
||||
|
||||
static void BM_SparseMM(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
int nnzPerCol = state.range(1);
|
||||
SpMat sm1(n, n), sm2(n, n), sm3(n, n);
|
||||
fillMatrix(nnzPerCol, n, n, sm1);
|
||||
fillMatrix(nnzPerCol, n, n, sm2);
|
||||
for (auto _ : state) {
|
||||
sm3 = sm1 * sm2;
|
||||
benchmark::DoNotOptimize(sm3.valuePtr());
|
||||
}
|
||||
state.counters["nnz_A"] = sm1.nonZeros();
|
||||
state.counters["nnz_B"] = sm2.nonZeros();
|
||||
}
|
||||
|
||||
static void SpMMSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {1000, 10000}) {
|
||||
for (int nnz : {4, 6, 10}) {
|
||||
b->Args({n, nnz});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(BM_SparseMM)->Apply(SpMMSizes);
|
||||
45
benchmarks/sparse_transpose.cpp
Normal file
45
benchmarks/sparse_transpose.cpp
Normal file
@@ -0,0 +1,45 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <Eigen/Sparse>
|
||||
#include <set>
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
typedef double Scalar;
|
||||
typedef SparseMatrix<Scalar> SpMat;
|
||||
|
||||
static void fillMatrix(float density, int rows, int cols, SpMat& dst) {
|
||||
dst.resize(rows, cols);
|
||||
dst.reserve(static_cast<int>(rows * cols * density));
|
||||
for (int j = 0; j < cols; ++j) {
|
||||
for (int i = 0; i < rows; ++i) {
|
||||
if (internal::random<float>(0, 1) < density) {
|
||||
dst.insert(i, j) = internal::random<Scalar>();
|
||||
}
|
||||
}
|
||||
}
|
||||
dst.makeCompressed();
|
||||
}
|
||||
|
||||
static void BM_SparseTranspose(benchmark::State& state) {
|
||||
int n = state.range(0);
|
||||
float density = state.range(1) / 10000.0f;
|
||||
SpMat sm(n, n), result(n, n);
|
||||
fillMatrix(density, n, n, sm);
|
||||
for (auto _ : state) {
|
||||
result = sm.transpose();
|
||||
benchmark::DoNotOptimize(result.valuePtr());
|
||||
}
|
||||
state.counters["nnz"] = sm.nonZeros();
|
||||
state.counters["density%"] = density * 100;
|
||||
}
|
||||
|
||||
static void TransposeSizes(::benchmark::Benchmark* b) {
|
||||
// Args: {size, density*10000}
|
||||
for (int n : {1000, 10000}) {
|
||||
for (int d : {100, 50, 10, 4}) { // 1%, 0.5%, 0.1%, 0.04%
|
||||
b->Args({n, d});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(BM_SparseTranspose)->Apply(TransposeSizes);
|
||||
Reference in New Issue
Block a user