// Benchmarks for colwise/rowwise reductions and broadcasting operations. // // Tests vectorwise reductions (sum, mean, norm, minCoeff, maxCoeff) and // broadcasting arithmetic (rowwise += vec, colwise -= vec, rowwise *= vec). #include #include using namespace Eigen; // --- Colwise reductions (reduce each column to a scalar) --- template static void BM_ColwiseSum(benchmark::State& state) { const Index rows = state.range(0); const Index cols = state.range(1); using Mat = Matrix; Mat m = Mat::Random(rows, cols); Matrix result(cols); for (auto _ : state) { result = m.colwise().sum(); benchmark::DoNotOptimize(result.data()); } state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar)); } template static void BM_ColwiseMean(benchmark::State& state) { const Index rows = state.range(0); const Index cols = state.range(1); using Mat = Matrix; Mat m = Mat::Random(rows, cols); Matrix result(cols); for (auto _ : state) { result = m.colwise().mean(); benchmark::DoNotOptimize(result.data()); } state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar)); } template static void BM_ColwiseNorm(benchmark::State& state) { const Index rows = state.range(0); const Index cols = state.range(1); using Mat = Matrix; Mat m = Mat::Random(rows, cols); Matrix result(cols); for (auto _ : state) { result = m.colwise().norm(); benchmark::DoNotOptimize(result.data()); } state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar)); } template static void BM_ColwiseMinCoeff(benchmark::State& state) { const Index rows = state.range(0); const Index cols = state.range(1); using Mat = Matrix; Mat m = Mat::Random(rows, cols); Matrix result(cols); for (auto _ : state) { result = m.colwise().minCoeff(); benchmark::DoNotOptimize(result.data()); } state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar)); } template static void BM_ColwiseMaxCoeff(benchmark::State& state) { const Index rows = state.range(0); const Index cols = state.range(1); using Mat = Matrix; Mat m = Mat::Random(rows, cols); Matrix result(cols); for (auto _ : state) { result = m.colwise().maxCoeff(); benchmark::DoNotOptimize(result.data()); } state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar)); } // --- Rowwise reductions (reduce each row to a scalar) --- template static void BM_RowwiseSum(benchmark::State& state) { const Index rows = state.range(0); const Index cols = state.range(1); using Mat = Matrix; Mat m = Mat::Random(rows, cols); Matrix result(rows); for (auto _ : state) { result = m.rowwise().sum(); benchmark::DoNotOptimize(result.data()); } state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar)); } template static void BM_RowwiseNorm(benchmark::State& state) { const Index rows = state.range(0); const Index cols = state.range(1); using Mat = Matrix; Mat m = Mat::Random(rows, cols); Matrix result(rows); for (auto _ : state) { result = m.rowwise().norm(); benchmark::DoNotOptimize(result.data()); } state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar)); } // --- Broadcasting operations --- template static void BM_RowwiseBroadcastAdd(benchmark::State& state) { const Index rows = state.range(0); const Index cols = state.range(1); using Mat = Matrix; using Vec = Matrix; Mat m = Mat::Random(rows, cols); Vec v = Vec::Random(cols); for (auto _ : state) { m.noalias() = m.rowwise() + v; benchmark::DoNotOptimize(m.data()); } state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar) * 2); } template static void BM_ColwiseBroadcastAdd(benchmark::State& state) { const Index rows = state.range(0); const Index cols = state.range(1); using Mat = Matrix; using Vec = Matrix; Mat m = Mat::Random(rows, cols); Vec v = Vec::Random(rows); for (auto _ : state) { m.noalias() = m.colwise() + v; benchmark::DoNotOptimize(m.data()); } state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar) * 2); } template static void BM_RowwiseBroadcastMul(benchmark::State& state) { const Index rows = state.range(0); const Index cols = state.range(1); using Mat = Matrix; Mat m = Mat::Random(rows, cols); Array v = Array::Random(cols); for (auto _ : state) { m.array().rowwise() *= v; benchmark::DoNotOptimize(m.data()); } state.SetBytesProcessed(state.iterations() * rows * cols * sizeof(Scalar) * 2); } // --- Size configurations --- // clang-format off // Square matrices; tall-thin (many rows, few cols); short-wide (few rows, many cols). #define BROADCAST_SIZES \ ->Args({64, 64})->Args({128, 128})->Args({256, 256})->Args({512, 512})->Args({1024, 1024}) \ ->Args({10000, 32})->Args({32, 10000}) // --- Register: float --- BENCHMARK(BM_ColwiseSum) BROADCAST_SIZES ->Name("ColwiseSum_float"); BENCHMARK(BM_ColwiseMean) BROADCAST_SIZES ->Name("ColwiseMean_float"); BENCHMARK(BM_ColwiseNorm) BROADCAST_SIZES ->Name("ColwiseNorm_float"); BENCHMARK(BM_ColwiseMinCoeff) BROADCAST_SIZES ->Name("ColwiseMinCoeff_float"); BENCHMARK(BM_ColwiseMaxCoeff) BROADCAST_SIZES ->Name("ColwiseMaxCoeff_float"); BENCHMARK(BM_RowwiseSum) BROADCAST_SIZES ->Name("RowwiseSum_float"); BENCHMARK(BM_RowwiseNorm) BROADCAST_SIZES ->Name("RowwiseNorm_float"); BENCHMARK(BM_RowwiseBroadcastAdd) BROADCAST_SIZES ->Name("RowwiseBroadcastAdd_float"); BENCHMARK(BM_ColwiseBroadcastAdd) BROADCAST_SIZES ->Name("ColwiseBroadcastAdd_float"); BENCHMARK(BM_RowwiseBroadcastMul) BROADCAST_SIZES ->Name("RowwiseBroadcastMul_float"); // --- Register: double --- BENCHMARK(BM_ColwiseSum) BROADCAST_SIZES ->Name("ColwiseSum_double"); BENCHMARK(BM_ColwiseMean) BROADCAST_SIZES ->Name("ColwiseMean_double"); BENCHMARK(BM_ColwiseNorm) BROADCAST_SIZES ->Name("ColwiseNorm_double"); BENCHMARK(BM_ColwiseMinCoeff) BROADCAST_SIZES ->Name("ColwiseMinCoeff_double"); BENCHMARK(BM_ColwiseMaxCoeff) BROADCAST_SIZES ->Name("ColwiseMaxCoeff_double"); BENCHMARK(BM_RowwiseSum) BROADCAST_SIZES ->Name("RowwiseSum_double"); BENCHMARK(BM_RowwiseNorm) BROADCAST_SIZES ->Name("RowwiseNorm_double"); BENCHMARK(BM_RowwiseBroadcastAdd) BROADCAST_SIZES ->Name("RowwiseBroadcastAdd_double"); BENCHMARK(BM_ColwiseBroadcastAdd) BROADCAST_SIZES ->Name("ColwiseBroadcastAdd_double"); BENCHMARK(BM_RowwiseBroadcastMul) BROADCAST_SIZES ->Name("RowwiseBroadcastMul_double"); #undef BROADCAST_SIZES // clang-format on