mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
132 lines
4.1 KiB
C++
132 lines
4.1 KiB
C++
|
|
// Benchmarks for Eigen Tensor coefficient-wise operations.
|
||
|
|
// Covers activation functions, normalization, and element-wise arithmetic.
|
||
|
|
|
||
|
|
#include <benchmark/benchmark.h>
|
||
|
|
#include <unsupported/Eigen/CXX11/Tensor>
|
||
|
|
|
||
|
|
using namespace Eigen;
|
||
|
|
|
||
|
|
typedef float Scalar;
|
||
|
|
|
||
|
|
// Macro to define a benchmark for a unary tensor operation.
|
||
|
|
#define BENCH_TENSOR_UNARY(NAME, EXPR) \
|
||
|
|
static void BM_##NAME(benchmark::State& state) { \
|
||
|
|
const int M = state.range(0); \
|
||
|
|
const int N = state.range(1); \
|
||
|
|
Tensor<Scalar, 2> a(M, N); \
|
||
|
|
a.setRandom(); \
|
||
|
|
Tensor<Scalar, 2> b(M, N); \
|
||
|
|
for (auto _ : state) { \
|
||
|
|
b = EXPR; \
|
||
|
|
benchmark::DoNotOptimize(b.data()); \
|
||
|
|
benchmark::ClobberMemory(); \
|
||
|
|
} \
|
||
|
|
state.SetBytesProcessed(state.iterations() * M * N * sizeof(Scalar) * 2); \
|
||
|
|
}
|
||
|
|
|
||
|
|
BENCH_TENSOR_UNARY(Exp, a.exp())
|
||
|
|
BENCH_TENSOR_UNARY(Log, a.abs().log())
|
||
|
|
BENCH_TENSOR_UNARY(Tanh, a.tanh())
|
||
|
|
BENCH_TENSOR_UNARY(Sigmoid, a.sigmoid())
|
||
|
|
BENCH_TENSOR_UNARY(ReLU, a.cwiseMax(Scalar(0)))
|
||
|
|
BENCH_TENSOR_UNARY(Sqrt, a.abs().sqrt())
|
||
|
|
|
||
|
|
// --- Element-wise binary operations ---
|
||
|
|
static void BM_Add(benchmark::State& state) {
|
||
|
|
const int M = state.range(0);
|
||
|
|
const int N = state.range(1);
|
||
|
|
|
||
|
|
Tensor<Scalar, 2> a(M, N);
|
||
|
|
Tensor<Scalar, 2> b(M, N);
|
||
|
|
Tensor<Scalar, 2> c(M, N);
|
||
|
|
a.setRandom();
|
||
|
|
b.setRandom();
|
||
|
|
|
||
|
|
for (auto _ : state) {
|
||
|
|
c = a + b;
|
||
|
|
benchmark::DoNotOptimize(c.data());
|
||
|
|
benchmark::ClobberMemory();
|
||
|
|
}
|
||
|
|
state.SetBytesProcessed(state.iterations() * M * N * sizeof(Scalar) * 3);
|
||
|
|
}
|
||
|
|
|
||
|
|
static void BM_Mul(benchmark::State& state) {
|
||
|
|
const int M = state.range(0);
|
||
|
|
const int N = state.range(1);
|
||
|
|
|
||
|
|
Tensor<Scalar, 2> a(M, N);
|
||
|
|
Tensor<Scalar, 2> b(M, N);
|
||
|
|
Tensor<Scalar, 2> c(M, N);
|
||
|
|
a.setRandom();
|
||
|
|
b.setRandom();
|
||
|
|
|
||
|
|
for (auto _ : state) {
|
||
|
|
c = a * b;
|
||
|
|
benchmark::DoNotOptimize(c.data());
|
||
|
|
benchmark::ClobberMemory();
|
||
|
|
}
|
||
|
|
state.SetBytesProcessed(state.iterations() * M * N * sizeof(Scalar) * 3);
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- Fused multiply-add ---
|
||
|
|
static void BM_FMA(benchmark::State& state) {
|
||
|
|
const int M = state.range(0);
|
||
|
|
const int N = state.range(1);
|
||
|
|
|
||
|
|
Tensor<Scalar, 2> a(M, N);
|
||
|
|
Tensor<Scalar, 2> b(M, N);
|
||
|
|
Tensor<Scalar, 2> c(M, N);
|
||
|
|
Tensor<Scalar, 2> d(M, N);
|
||
|
|
a.setRandom();
|
||
|
|
b.setRandom();
|
||
|
|
c.setRandom();
|
||
|
|
|
||
|
|
for (auto _ : state) {
|
||
|
|
d = a * b + c;
|
||
|
|
benchmark::DoNotOptimize(d.data());
|
||
|
|
benchmark::ClobberMemory();
|
||
|
|
}
|
||
|
|
state.SetBytesProcessed(state.iterations() * M * N * sizeof(Scalar) * 4);
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- Rank-4 coefficient-wise (CNN feature maps) ---
|
||
|
|
static void BM_ReLU_Rank4(benchmark::State& state) {
|
||
|
|
const int batch = state.range(0);
|
||
|
|
const int C = state.range(1);
|
||
|
|
const int H = state.range(2);
|
||
|
|
|
||
|
|
Tensor<Scalar, 4> a(batch, C, H, H);
|
||
|
|
Tensor<Scalar, 4> b(batch, C, H, H);
|
||
|
|
a.setRandom();
|
||
|
|
|
||
|
|
for (auto _ : state) {
|
||
|
|
b = a.cwiseMax(Scalar(0));
|
||
|
|
benchmark::DoNotOptimize(b.data());
|
||
|
|
benchmark::ClobberMemory();
|
||
|
|
}
|
||
|
|
state.SetBytesProcessed(state.iterations() * batch * C * H * H * sizeof(Scalar) * 2);
|
||
|
|
}
|
||
|
|
|
||
|
|
static void CwiseSizes(::benchmark::Benchmark* b) {
|
||
|
|
for (int size : {256, 1024}) {
|
||
|
|
b->Args({size, size});
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
static void Rank4Sizes(::benchmark::Benchmark* b) {
|
||
|
|
b->Args({32, 64, 16});
|
||
|
|
b->Args({8, 128, 32});
|
||
|
|
b->Args({1, 256, 64});
|
||
|
|
}
|
||
|
|
|
||
|
|
BENCHMARK(BM_Exp)->Apply(CwiseSizes);
|
||
|
|
BENCHMARK(BM_Log)->Apply(CwiseSizes);
|
||
|
|
BENCHMARK(BM_Tanh)->Apply(CwiseSizes);
|
||
|
|
BENCHMARK(BM_Sigmoid)->Apply(CwiseSizes);
|
||
|
|
BENCHMARK(BM_ReLU)->Apply(CwiseSizes);
|
||
|
|
BENCHMARK(BM_Sqrt)->Apply(CwiseSizes);
|
||
|
|
BENCHMARK(BM_Add)->Apply(CwiseSizes);
|
||
|
|
BENCHMARK(BM_Mul)->Apply(CwiseSizes);
|
||
|
|
BENCHMARK(BM_FMA)->Apply(CwiseSizes);
|
||
|
|
BENCHMARK(BM_ReLU_Rank4)->Apply(Rank4Sizes);
|