mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
152 lines
4.2 KiB
C++
152 lines
4.2 KiB
C++
// Benchmarks for Eigen Tensor convolution (1D and 2D).
|
|
|
|
#define EIGEN_USE_THREADS
|
|
|
|
#include <benchmark/benchmark.h>
|
|
#include <unsupported/Eigen/CXX11/Tensor>
|
|
#include <unsupported/Eigen/CXX11/ThreadPool>
|
|
|
|
using namespace Eigen;
|
|
|
|
typedef float Scalar;
|
|
|
|
// --- 1D convolution ---
|
|
static void BM_Convolve1D(benchmark::State& state) {
|
|
const int input_size = state.range(0);
|
|
const int kernel_size = state.range(1);
|
|
|
|
Tensor<Scalar, 1> input(input_size);
|
|
Tensor<Scalar, 1> kernel(kernel_size);
|
|
input.setRandom();
|
|
kernel.setRandom();
|
|
|
|
Eigen::array<int, 1> dims = {0};
|
|
|
|
for (auto _ : state) {
|
|
Tensor<Scalar, 1> result = input.convolve(kernel, dims);
|
|
benchmark::DoNotOptimize(result.data());
|
|
benchmark::ClobberMemory();
|
|
}
|
|
double flops = 2.0 * (input_size - kernel_size + 1) * kernel_size;
|
|
state.counters["GFLOPS"] =
|
|
benchmark::Counter(flops, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
|
}
|
|
|
|
// --- 2D convolution ---
|
|
static void BM_Convolve2D(benchmark::State& state) {
|
|
const int H = state.range(0);
|
|
const int W = state.range(1);
|
|
const int kH = state.range(2);
|
|
const int kW = state.range(3);
|
|
|
|
Tensor<Scalar, 2> input(H, W);
|
|
Tensor<Scalar, 2> kernel(kH, kW);
|
|
input.setRandom();
|
|
kernel.setRandom();
|
|
|
|
Eigen::array<int, 2> dims = {0, 1};
|
|
|
|
for (auto _ : state) {
|
|
Tensor<Scalar, 2> result = input.convolve(kernel, dims);
|
|
benchmark::DoNotOptimize(result.data());
|
|
benchmark::ClobberMemory();
|
|
}
|
|
double flops = 2.0 * (H - kH + 1) * (W - kW + 1) * kH * kW;
|
|
state.counters["GFLOPS"] =
|
|
benchmark::Counter(flops, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
|
}
|
|
|
|
// --- 2D convolution with channels (rank-3: C x H x W, convolve on H,W) ---
|
|
static void BM_Convolve2D_Channels(benchmark::State& state) {
|
|
const int C = state.range(0);
|
|
const int H = state.range(1);
|
|
const int kH = state.range(2);
|
|
|
|
Tensor<Scalar, 3> input(C, H, H);
|
|
Tensor<Scalar, 2> kernel(kH, kH);
|
|
input.setRandom();
|
|
kernel.setRandom();
|
|
|
|
Eigen::array<int, 2> dims = {1, 2};
|
|
|
|
for (auto _ : state) {
|
|
Tensor<Scalar, 3> result = input.convolve(kernel, dims);
|
|
benchmark::DoNotOptimize(result.data());
|
|
benchmark::ClobberMemory();
|
|
}
|
|
int outH = H - kH + 1;
|
|
double flops = 2.0 * C * outH * outH * kH * kH;
|
|
state.counters["GFLOPS"] =
|
|
benchmark::Counter(flops, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
|
}
|
|
|
|
// --- 2D convolution with ThreadPool ---
|
|
static void BM_Convolve2D_ThreadPool(benchmark::State& state) {
|
|
const int H = state.range(0);
|
|
const int kH = state.range(1);
|
|
const int threads = state.range(2);
|
|
|
|
Tensor<Scalar, 2> input(H, H);
|
|
Tensor<Scalar, 2> kernel(kH, kH);
|
|
Tensor<Scalar, 2> result(H - kH + 1, H - kH + 1);
|
|
input.setRandom();
|
|
kernel.setRandom();
|
|
|
|
ThreadPool tp(threads);
|
|
ThreadPoolDevice dev(&tp, threads);
|
|
|
|
Eigen::array<int, 2> dims = {0, 1};
|
|
|
|
for (auto _ : state) {
|
|
result.device(dev) = input.convolve(kernel, dims);
|
|
benchmark::DoNotOptimize(result.data());
|
|
benchmark::ClobberMemory();
|
|
}
|
|
int outH = H - kH + 1;
|
|
double flops = 2.0 * outH * outH * kH * kH;
|
|
state.counters["GFLOPS"] =
|
|
benchmark::Counter(flops, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
|
|
state.counters["threads"] = threads;
|
|
}
|
|
|
|
static void Conv1DSizes(::benchmark::Benchmark* b) {
|
|
for (int input : {128, 512, 2048}) {
|
|
for (int kernel : {3, 5, 11}) {
|
|
b->Args({input, kernel});
|
|
}
|
|
}
|
|
}
|
|
|
|
static void Conv2DSizes(::benchmark::Benchmark* b) {
|
|
for (int hw : {32, 64, 128, 224}) {
|
|
for (int k : {3, 5, 7}) {
|
|
b->Args({hw, hw, k, k});
|
|
}
|
|
}
|
|
}
|
|
|
|
static void Conv2DChannelSizes(::benchmark::Benchmark* b) {
|
|
for (int c : {3, 64, 128}) {
|
|
for (int hw : {16, 32, 56}) {
|
|
for (int k : {3, 5}) {
|
|
b->Args({c, hw, k});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void Conv2DThreadPoolSizes(::benchmark::Benchmark* b) {
|
|
for (int hw : {64, 128, 224}) {
|
|
for (int k : {3, 5}) {
|
|
for (int threads : {2, 4, 8}) {
|
|
b->Args({hw, k, threads});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
BENCHMARK(BM_Convolve1D)->Apply(Conv1DSizes);
|
|
BENCHMARK(BM_Convolve2D)->Apply(Conv2DSizes);
|
|
BENCHMARK(BM_Convolve2D_Channels)->Apply(Conv2DChannelSizes);
|
|
BENCHMARK(BM_Convolve2D_ThreadPool)->Apply(Conv2DThreadPoolSizes);
|