// Benchmarks for full reductions: sum, prod, minCoeff, maxCoeff, mean, // norm, squaredNorm, lpNorm<1>, lpNorm. // // These are memory-bandwidth-bound for large vectors, so we report // bytes processed rather than FLOPS. #include #include using namespace Eigen; // --- Vector reductions (1-D) --- template static void BM_VectorSum(benchmark::State& state) { const Index n = state.range(0); Matrix v = Matrix::Random(n); for (auto _ : state) { Scalar s = v.sum(); benchmark::DoNotOptimize(s); } state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); } template static void BM_VectorProd(benchmark::State& state) { const Index n = state.range(0); Matrix v = Matrix::Constant(n, Scalar(1)); // Use values near 1 to avoid overflow/underflow. v += Scalar(0.001) * Matrix::Random(n); for (auto _ : state) { Scalar p = v.prod(); benchmark::DoNotOptimize(p); } state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); } template static void BM_VectorMinCoeff(benchmark::State& state) { const Index n = state.range(0); Matrix v = Matrix::Random(n); for (auto _ : state) { Scalar m = v.minCoeff(); benchmark::DoNotOptimize(m); } state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); } template static void BM_VectorMaxCoeff(benchmark::State& state) { const Index n = state.range(0); Matrix v = Matrix::Random(n); for (auto _ : state) { Scalar m = v.maxCoeff(); benchmark::DoNotOptimize(m); } state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); } template static void BM_VectorMean(benchmark::State& state) { const Index n = state.range(0); Matrix v = Matrix::Random(n); for (auto _ : state) { Scalar m = v.mean(); benchmark::DoNotOptimize(m); } state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); } template static void BM_VectorSquaredNorm(benchmark::State& state) { const Index n = state.range(0); Matrix v = Matrix::Random(n); for (auto _ : state) { Scalar s = v.squaredNorm(); benchmark::DoNotOptimize(s); } state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); } template static void BM_VectorNorm(benchmark::State& state) { const Index n = state.range(0); Matrix v = Matrix::Random(n); for (auto _ : state) { Scalar s = v.norm(); benchmark::DoNotOptimize(s); } state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); } template static void BM_VectorLpNorm1(benchmark::State& state) { const Index n = state.range(0); Matrix v = Matrix::Random(n); for (auto _ : state) { Scalar s = v.template lpNorm<1>(); benchmark::DoNotOptimize(s); } state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); } template static void BM_VectorLpNormInf(benchmark::State& state) { const Index n = state.range(0); Matrix v = Matrix::Random(n); for (auto _ : state) { Scalar s = v.template lpNorm(); benchmark::DoNotOptimize(s); } state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar)); } // --- Matrix reductions (2-D) --- template static void BM_MatrixSum(benchmark::State& state) { const Index n = state.range(0); Matrix m = Matrix::Random(n, n); for (auto _ : state) { Scalar s = m.sum(); benchmark::DoNotOptimize(s); } state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar)); } template static void BM_MatrixNorm(benchmark::State& state) { const Index n = state.range(0); Matrix m = Matrix::Random(n, n); for (auto _ : state) { Scalar s = m.norm(); benchmark::DoNotOptimize(s); } state.SetBytesProcessed(state.iterations() * n * n * sizeof(Scalar)); } // --- Size configurations --- // clang-format off #define VECTOR_SIZES ->Arg(64)->Arg(256)->Arg(1024)->Arg(4096)->Arg(16384)->Arg(65536)->Arg(262144)->Arg(1048576) #define MATRIX_SIZES ->Arg(8)->Arg(32)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024) // --- Register: float --- BENCHMARK(BM_VectorSum) VECTOR_SIZES ->Name("VectorSum_float"); BENCHMARK(BM_VectorProd) VECTOR_SIZES ->Name("VectorProd_float"); BENCHMARK(BM_VectorMinCoeff) VECTOR_SIZES ->Name("VectorMinCoeff_float"); BENCHMARK(BM_VectorMaxCoeff) VECTOR_SIZES ->Name("VectorMaxCoeff_float"); BENCHMARK(BM_VectorMean) VECTOR_SIZES ->Name("VectorMean_float"); BENCHMARK(BM_VectorSquaredNorm) VECTOR_SIZES ->Name("VectorSquaredNorm_float"); BENCHMARK(BM_VectorNorm) VECTOR_SIZES ->Name("VectorNorm_float"); BENCHMARK(BM_VectorLpNorm1) VECTOR_SIZES ->Name("VectorLpNorm1_float"); BENCHMARK(BM_VectorLpNormInf) VECTOR_SIZES ->Name("VectorLpNormInf_float"); BENCHMARK(BM_MatrixSum) MATRIX_SIZES ->Name("MatrixSum_float"); BENCHMARK(BM_MatrixNorm) MATRIX_SIZES ->Name("MatrixNorm_float"); // --- Register: double --- BENCHMARK(BM_VectorSum) VECTOR_SIZES ->Name("VectorSum_double"); BENCHMARK(BM_VectorProd) VECTOR_SIZES ->Name("VectorProd_double"); BENCHMARK(BM_VectorMinCoeff) VECTOR_SIZES ->Name("VectorMinCoeff_double"); BENCHMARK(BM_VectorMaxCoeff) VECTOR_SIZES ->Name("VectorMaxCoeff_double"); BENCHMARK(BM_VectorMean) VECTOR_SIZES ->Name("VectorMean_double"); BENCHMARK(BM_VectorSquaredNorm) VECTOR_SIZES ->Name("VectorSquaredNorm_double"); BENCHMARK(BM_VectorNorm) VECTOR_SIZES ->Name("VectorNorm_double"); BENCHMARK(BM_VectorLpNorm1) VECTOR_SIZES ->Name("VectorLpNorm1_double"); BENCHMARK(BM_VectorLpNormInf) VECTOR_SIZES ->Name("VectorLpNormInf_double"); BENCHMARK(BM_MatrixSum) MATRIX_SIZES ->Name("MatrixSum_double"); BENCHMARK(BM_MatrixNorm) MATRIX_SIZES ->Name("MatrixNorm_double"); #undef VECTOR_SIZES #undef MATRIX_SIZES // clang-format on