// Benchmark: Eigen GEMM vs CBLAS GEMM // Requires CBLAS: compile with -DHAVE_BLAS and link -lcblas // // Based on the old bench/benchBlasGemm.cpp (removed) #include #include using namespace Eigen; #ifndef SCALAR #define SCALAR float #endif typedef SCALAR Scalar; typedef Matrix MyMatrix; static void BM_EigenGemm(benchmark::State& state) { int M = state.range(0); int N = state.range(1); int K = state.range(2); MyMatrix a = MyMatrix::Random(M, K); MyMatrix b = MyMatrix::Random(K, N); MyMatrix c = MyMatrix::Random(M, N); for (auto _ : state) { c.noalias() += a * b; benchmark::DoNotOptimize(c.data()); } state.counters["GFLOPS"] = benchmark::Counter(2.0 * M * N * K, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000); } #ifdef HAVE_BLAS extern "C" { #include } #ifdef _FLOAT #define CBLAS_GEMM cblas_sgemm #else #define CBLAS_GEMM cblas_dgemm #endif static void BM_CblasGemm(benchmark::State& state) { int M = state.range(0); int N = state.range(1); int K = state.range(2); MyMatrix a = MyMatrix::Random(M, K); MyMatrix b = MyMatrix::Random(K, N); MyMatrix c = MyMatrix::Random(M, N); Scalar alpha = 1, beta = 1; for (auto _ : state) { CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, a.data(), M, b.data(), K, beta, c.data(), M); benchmark::DoNotOptimize(c.data()); } state.counters["GFLOPS"] = benchmark::Counter(2.0 * M * N * K, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000); } #endif // clang-format off #define GEMM_SIZES \ ->Args({32, 32, 32})->Args({64, 64, 64})->Args({128, 128, 128}) \ ->Args({256, 256, 256})->Args({512, 512, 512})->Args({1024, 1024, 1024})->Args({2048, 2048, 2048}) \ ->Args({1000, 100, 1000})->Args({100, 1000, 100}) BENCHMARK(BM_EigenGemm) GEMM_SIZES; #ifdef HAVE_BLAS BENCHMARK(BM_CblasGemm) GEMM_SIZES; #endif #undef GEMM_SIZES // clang-format on