From 8190c82cb4a2701995841407932b050213e68787 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen <4643818-rmlarsen1@users.noreply.gitlab.com> Date: Wed, 11 Mar 2026 23:20:11 -0700 Subject: [PATCH] Add missing SIMD math function benchmarks libeigen/eigen!2284 Co-authored-by: Rasmus Munk Larsen --- benchmarks/Core/bench_cwise_math.cpp | 91 ++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/benchmarks/Core/bench_cwise_math.cpp b/benchmarks/Core/bench_cwise_math.cpp index 6c2da094e..ae0f12d98 100644 --- a/benchmarks/Core/bench_cwise_math.cpp +++ b/benchmarks/Core/bench_cwise_math.cpp @@ -35,15 +35,22 @@ BENCH_CWISE_UNARY(Log1p, a.log1p(), -0.5, 100) BENCH_CWISE_UNARY(Sqrt, a.sqrt(), 0, 100) BENCH_CWISE_UNARY(Rsqrt, a.rsqrt(), 0.01, 100) +BENCH_CWISE_UNARY(Log2, a.log2(), 0.01, 100) +BENCH_CWISE_UNARY(Exp2, a.exp2(), -10, 10) +BENCH_CWISE_UNARY(Expm1, a.expm1(), -2, 2) +BENCH_CWISE_UNARY(Cbrt, a.cbrt(), -100, 100) + // Trigonometric functions BENCH_CWISE_UNARY(Sin, a.sin(), -3.14, 3.14) BENCH_CWISE_UNARY(Cos, a.cos(), -3.14, 3.14) BENCH_CWISE_UNARY(Tan, a.tan(), -1.5, 1.5) BENCH_CWISE_UNARY(Asin, a.asin(), -0.99, 0.99) +BENCH_CWISE_UNARY(Acos, a.acos(), -0.99, 0.99) BENCH_CWISE_UNARY(Atan, a.atan(), -10, 10) // Hyperbolic / special BENCH_CWISE_UNARY(Tanh, a.tanh(), -5, 5) +BENCH_CWISE_UNARY(Atanh, a.atanh(), -0.99, 0.99) BENCH_CWISE_UNARY(Erf, Eigen::erf(a), -4, 4) // Simple operations (should be very fast / memory-bound) @@ -53,6 +60,8 @@ BENCH_CWISE_UNARY(Cube, a.cube(), -10, 10) BENCH_CWISE_UNARY(Ceil, a.ceil(), -100, 100) BENCH_CWISE_UNARY(Floor, a.floor(), -100, 100) BENCH_CWISE_UNARY(Round, a.round(), -100, 100) +BENCH_CWISE_UNARY(Rint, a.rint(), -100, 100) +BENCH_CWISE_UNARY(Trunc, a.trunc(), -100, 100) // Sigmoid: 1 / (1 + exp(-x)), common in ML. BENCH_CWISE_UNARY(Sigmoid, Scalar(1) / (Scalar(1) + (-a).exp()), -10, 10) @@ -71,6 +80,56 @@ static void BM_Pow(benchmark::State& state) { state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar) * 2); } +// Macro for complex unary benchmarks. Random() already produces complex +// values with real & imag in [-1,1]; scale both parts to [LO, HI]. +#define BENCH_CWISE_UNARY_COMPLEX(NAME, EXPR, LO, HI) \ + template \ + static void BM_##NAME##_complex(benchmark::State& state) { \ + using Scalar = std::complex; \ + const Index n = state.range(0); \ + using Arr = Array; \ + Arr a = (Arr::Random(n) + Scalar(RealScalar(1), RealScalar(1))) * \ + Scalar(RealScalar((double(HI) - double(LO)) / 2.0), RealScalar(0)) + \ + Scalar(RealScalar(LO), RealScalar(LO)); \ + Arr b(n); \ + for (auto _ : state) { \ + b = EXPR; \ + benchmark::DoNotOptimize(b.data()); \ + } \ + state.SetBytesProcessed(state.iterations() * n * Index(sizeof(Scalar)) * 2); \ + } + +// Macro for complex binary benchmarks (e.g. multiply, divide). +#define BENCH_CWISE_BINARY_COMPLEX(NAME, EXPR, LO, HI) \ + template \ + static void BM_##NAME##_complex(benchmark::State& state) { \ + using Scalar = std::complex; \ + const Index n = state.range(0); \ + using Arr = Array; \ + Arr a = (Arr::Random(n) + Scalar(RealScalar(1), RealScalar(1))) * \ + Scalar(RealScalar((double(HI) - double(LO)) / 2.0), RealScalar(0)) + \ + Scalar(RealScalar(LO), RealScalar(LO)); \ + Arr b = (Arr::Random(n) + Scalar(RealScalar(1), RealScalar(1))) * \ + Scalar(RealScalar((double(HI) - double(LO)) / 2.0), RealScalar(0)) + \ + Scalar(RealScalar(LO), RealScalar(LO)); \ + Arr c(n); \ + for (auto _ : state) { \ + c = EXPR; \ + benchmark::DoNotOptimize(c.data()); \ + } \ + state.SetBytesProcessed(state.iterations() * n * Index(sizeof(Scalar)) * 3); \ + } + +// Complex unary (SIMD implementations in GenericPacketMathFunctions.h) +BENCH_CWISE_UNARY_COMPLEX(Exp, a.exp(), -5, 5) +BENCH_CWISE_UNARY_COMPLEX(Log, a.log(), 0.01, 100) +BENCH_CWISE_UNARY_COMPLEX(Sqrt, a.sqrt(), -100, 100) +BENCH_CWISE_UNARY_COMPLEX(Square, a.square(), -10, 10) + +// Complex binary (pdiv_complex, pmul_complex) +BENCH_CWISE_BINARY_COMPLEX(Mul, a* b, -10, 10) +BENCH_CWISE_BINARY_COMPLEX(Div, a / b, -10, 10) + static void CwiseSizes(::benchmark::Benchmark* b) { for (int n : {1024, 4096, 16384, 65536, 262144, 1048576}) b->Arg(n); } @@ -79,14 +138,20 @@ static void CwiseSizes(::benchmark::Benchmark* b) { BENCHMARK(BM_Exp)->Apply(CwiseSizes)->Name("Exp_float"); BENCHMARK(BM_Log)->Apply(CwiseSizes)->Name("Log_float"); BENCHMARK(BM_Log1p)->Apply(CwiseSizes)->Name("Log1p_float"); +BENCHMARK(BM_Log2)->Apply(CwiseSizes)->Name("Log2_float"); BENCHMARK(BM_Sqrt)->Apply(CwiseSizes)->Name("Sqrt_float"); BENCHMARK(BM_Rsqrt)->Apply(CwiseSizes)->Name("Rsqrt_float"); +BENCHMARK(BM_Exp2)->Apply(CwiseSizes)->Name("Exp2_float"); +BENCHMARK(BM_Expm1)->Apply(CwiseSizes)->Name("Expm1_float"); +BENCHMARK(BM_Cbrt)->Apply(CwiseSizes)->Name("Cbrt_float"); BENCHMARK(BM_Sin)->Apply(CwiseSizes)->Name("Sin_float"); BENCHMARK(BM_Cos)->Apply(CwiseSizes)->Name("Cos_float"); BENCHMARK(BM_Tan)->Apply(CwiseSizes)->Name("Tan_float"); BENCHMARK(BM_Asin)->Apply(CwiseSizes)->Name("Asin_float"); +BENCHMARK(BM_Acos)->Apply(CwiseSizes)->Name("Acos_float"); BENCHMARK(BM_Atan)->Apply(CwiseSizes)->Name("Atan_float"); BENCHMARK(BM_Tanh)->Apply(CwiseSizes)->Name("Tanh_float"); +BENCHMARK(BM_Atanh)->Apply(CwiseSizes)->Name("Atanh_float"); BENCHMARK(BM_Erf)->Apply(CwiseSizes)->Name("Erf_float"); BENCHMARK(BM_Abs)->Apply(CwiseSizes)->Name("Abs_float"); BENCHMARK(BM_Square)->Apply(CwiseSizes)->Name("Square_float"); @@ -94,6 +159,8 @@ BENCHMARK(BM_Cube)->Apply(CwiseSizes)->Name("Cube_float"); BENCHMARK(BM_Ceil)->Apply(CwiseSizes)->Name("Ceil_float"); BENCHMARK(BM_Floor)->Apply(CwiseSizes)->Name("Floor_float"); BENCHMARK(BM_Round)->Apply(CwiseSizes)->Name("Round_float"); +BENCHMARK(BM_Rint)->Apply(CwiseSizes)->Name("Rint_float"); +BENCHMARK(BM_Trunc)->Apply(CwiseSizes)->Name("Trunc_float"); BENCHMARK(BM_Sigmoid)->Apply(CwiseSizes)->Name("Sigmoid_float"); BENCHMARK(BM_Pow)->Apply(CwiseSizes)->Name("Pow_float"); @@ -101,14 +168,20 @@ BENCHMARK(BM_Pow)->Apply(CwiseSizes)->Name("Pow_float"); BENCHMARK(BM_Exp)->Apply(CwiseSizes)->Name("Exp_double"); BENCHMARK(BM_Log)->Apply(CwiseSizes)->Name("Log_double"); BENCHMARK(BM_Log1p)->Apply(CwiseSizes)->Name("Log1p_double"); +BENCHMARK(BM_Log2)->Apply(CwiseSizes)->Name("Log2_double"); BENCHMARK(BM_Sqrt)->Apply(CwiseSizes)->Name("Sqrt_double"); BENCHMARK(BM_Rsqrt)->Apply(CwiseSizes)->Name("Rsqrt_double"); +BENCHMARK(BM_Exp2)->Apply(CwiseSizes)->Name("Exp2_double"); +BENCHMARK(BM_Expm1)->Apply(CwiseSizes)->Name("Expm1_double"); +BENCHMARK(BM_Cbrt)->Apply(CwiseSizes)->Name("Cbrt_double"); BENCHMARK(BM_Sin)->Apply(CwiseSizes)->Name("Sin_double"); BENCHMARK(BM_Cos)->Apply(CwiseSizes)->Name("Cos_double"); BENCHMARK(BM_Tan)->Apply(CwiseSizes)->Name("Tan_double"); BENCHMARK(BM_Asin)->Apply(CwiseSizes)->Name("Asin_double"); +BENCHMARK(BM_Acos)->Apply(CwiseSizes)->Name("Acos_double"); BENCHMARK(BM_Atan)->Apply(CwiseSizes)->Name("Atan_double"); BENCHMARK(BM_Tanh)->Apply(CwiseSizes)->Name("Tanh_double"); +BENCHMARK(BM_Atanh)->Apply(CwiseSizes)->Name("Atanh_double"); BENCHMARK(BM_Erf)->Apply(CwiseSizes)->Name("Erf_double"); BENCHMARK(BM_Abs)->Apply(CwiseSizes)->Name("Abs_double"); BENCHMARK(BM_Square)->Apply(CwiseSizes)->Name("Square_double"); @@ -116,5 +189,23 @@ BENCHMARK(BM_Cube)->Apply(CwiseSizes)->Name("Cube_double"); BENCHMARK(BM_Ceil)->Apply(CwiseSizes)->Name("Ceil_double"); BENCHMARK(BM_Floor)->Apply(CwiseSizes)->Name("Floor_double"); BENCHMARK(BM_Round)->Apply(CwiseSizes)->Name("Round_double"); +BENCHMARK(BM_Rint)->Apply(CwiseSizes)->Name("Rint_double"); +BENCHMARK(BM_Trunc)->Apply(CwiseSizes)->Name("Trunc_double"); BENCHMARK(BM_Sigmoid)->Apply(CwiseSizes)->Name("Sigmoid_double"); BENCHMARK(BM_Pow)->Apply(CwiseSizes)->Name("Pow_double"); + +// --- Register complex --- +BENCHMARK(BM_Exp_complex)->Apply(CwiseSizes)->Name("Exp_complexf"); +BENCHMARK(BM_Log_complex)->Apply(CwiseSizes)->Name("Log_complexf"); +BENCHMARK(BM_Sqrt_complex)->Apply(CwiseSizes)->Name("Sqrt_complexf"); +BENCHMARK(BM_Square_complex)->Apply(CwiseSizes)->Name("Square_complexf"); +BENCHMARK(BM_Mul_complex)->Apply(CwiseSizes)->Name("Mul_complexf"); +BENCHMARK(BM_Div_complex)->Apply(CwiseSizes)->Name("Div_complexf"); + +// --- Register complex --- +BENCHMARK(BM_Exp_complex)->Apply(CwiseSizes)->Name("Exp_complexd"); +BENCHMARK(BM_Log_complex)->Apply(CwiseSizes)->Name("Log_complexd"); +BENCHMARK(BM_Sqrt_complex)->Apply(CwiseSizes)->Name("Sqrt_complexd"); +BENCHMARK(BM_Square_complex)->Apply(CwiseSizes)->Name("Square_complexd"); +BENCHMARK(BM_Mul_complex)->Apply(CwiseSizes)->Name("Mul_complexd"); +BENCHMARK(BM_Div_complex)->Apply(CwiseSizes)->Name("Div_complexd");