mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Reduce GEMV and TRSM benchmark sizes for faster routine runs
libeigen/eigen!2163 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
@@ -117,23 +117,23 @@ static void BM_GemvAdj(benchmark::State& state) {
|
||||
|
||||
static void GemvSizes(::benchmark::Benchmark* b) {
|
||||
// Square matrices: exercises balanced kernel behavior.
|
||||
for (int size : {8, 16, 32, 64, 128, 256, 512, 1024, 4096}) {
|
||||
for (int size : {8, 32, 128, 512, 1024}) {
|
||||
b->Args({size, size});
|
||||
}
|
||||
// Tall-thin (m >> n): in ColMajor kernel, the inner vectorized loop over rows
|
||||
// is long while the outer column loop is short. In RowMajor kernel (transpose),
|
||||
// there are many rows to process but short dot products.
|
||||
for (int n : {1, 4, 16, 64}) {
|
||||
for (int m : {256, 1024, 4096}) {
|
||||
if (m != n) b->Args({m, n});
|
||||
for (int n : {1, 16}) {
|
||||
for (int m : {256, 1024}) {
|
||||
b->Args({m, n});
|
||||
}
|
||||
}
|
||||
// Short-wide (m << n): in ColMajor kernel, the outer column loop is long but
|
||||
// the inner vectorized loop over rows is short. In RowMajor kernel (transpose),
|
||||
// there are few rows but long dot products.
|
||||
for (int m : {1, 4, 16, 64}) {
|
||||
for (int n : {256, 1024, 4096}) {
|
||||
if (m != n) b->Args({m, n});
|
||||
for (int m : {1, 16}) {
|
||||
for (int n : {256, 1024}) {
|
||||
b->Args({m, n});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -149,12 +149,9 @@ BENCHMARK(BM_GemvTrans<float>)->Apply(GemvSizes)->Name("GemvTrans_float");
|
||||
BENCHMARK(BM_GemvTrans<double>)->Apply(GemvSizes)->Name("GemvTrans_double");
|
||||
|
||||
// Complex types: all four variants exercise distinct kernel code paths.
|
||||
// Only cfloat is benchmarked since cdouble exercises the same paths but slower.
|
||||
|
||||
BENCHMARK(BM_Gemv<std::complex<float>>)->Apply(GemvSizes)->Name("Gemv_cfloat");
|
||||
BENCHMARK(BM_Gemv<std::complex<double>>)->Apply(GemvSizes)->Name("Gemv_cdouble");
|
||||
BENCHMARK(BM_GemvTrans<std::complex<float>>)->Apply(GemvSizes)->Name("GemvTrans_cfloat");
|
||||
BENCHMARK(BM_GemvTrans<std::complex<double>>)->Apply(GemvSizes)->Name("GemvTrans_cdouble");
|
||||
BENCHMARK(BM_GemvConj<std::complex<float>>)->Apply(GemvSizes)->Name("GemvConj_cfloat");
|
||||
BENCHMARK(BM_GemvConj<std::complex<double>>)->Apply(GemvSizes)->Name("GemvConj_cdouble");
|
||||
BENCHMARK(BM_GemvAdj<std::complex<float>>)->Apply(GemvSizes)->Name("GemvAdj_cfloat");
|
||||
BENCHMARK(BM_GemvAdj<std::complex<double>>)->Apply(GemvSizes)->Name("GemvAdj_cdouble");
|
||||
|
||||
@@ -64,36 +64,31 @@ static void BM_TRSM_Right(benchmark::State& state) {
|
||||
// ---------- Size configurations ----------
|
||||
|
||||
static void TrsvSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {32, 64, 128, 256, 512, 1024}) {
|
||||
for (int n : {32, 128, 512}) {
|
||||
b->Args({n});
|
||||
}
|
||||
}
|
||||
|
||||
static void TrsmSizes(::benchmark::Benchmark* b) {
|
||||
for (int n : {32, 64, 128, 256, 512, 1024}) {
|
||||
for (int nrhs : {1, 4, 16, 64, 256}) {
|
||||
for (int n : {64, 256, 512}) {
|
||||
for (int nrhs : {1, 16, 64}) {
|
||||
b->Args({n, nrhs});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- TRSV benchmarks ----------
|
||||
// Only Lower is benchmarked; Upper exercises the same kernel via transposed storage.
|
||||
|
||||
BENCHMARK(BM_TRSV<float, Lower>)->Apply(TrsvSizes)->Name("TRSV_float_Lower");
|
||||
BENCHMARK(BM_TRSV<float, Upper>)->Apply(TrsvSizes)->Name("TRSV_float_Upper");
|
||||
BENCHMARK(BM_TRSV<double, Lower>)->Apply(TrsvSizes)->Name("TRSV_double_Lower");
|
||||
BENCHMARK(BM_TRSV<double, Upper>)->Apply(TrsvSizes)->Name("TRSV_double_Upper");
|
||||
|
||||
// ---------- TRSM Left benchmarks ----------
|
||||
|
||||
BENCHMARK(BM_TRSM_Left<float, Lower>)->Apply(TrsmSizes)->Name("TRSM_Left_float_Lower");
|
||||
BENCHMARK(BM_TRSM_Left<float, Upper>)->Apply(TrsmSizes)->Name("TRSM_Left_float_Upper");
|
||||
BENCHMARK(BM_TRSM_Left<double, Lower>)->Apply(TrsmSizes)->Name("TRSM_Left_double_Lower");
|
||||
BENCHMARK(BM_TRSM_Left<double, Upper>)->Apply(TrsmSizes)->Name("TRSM_Left_double_Upper");
|
||||
|
||||
// ---------- TRSM Right benchmarks ----------
|
||||
|
||||
BENCHMARK(BM_TRSM_Right<float, Lower>)->Apply(TrsmSizes)->Name("TRSM_Right_float_Lower");
|
||||
BENCHMARK(BM_TRSM_Right<float, Upper>)->Apply(TrsmSizes)->Name("TRSM_Right_float_Upper");
|
||||
BENCHMARK(BM_TRSM_Right<double, Lower>)->Apply(TrsmSizes)->Name("TRSM_Right_double_Lower");
|
||||
BENCHMARK(BM_TRSM_Right<double, Upper>)->Apply(TrsmSizes)->Name("TRSM_Right_double_Upper");
|
||||
|
||||
Reference in New Issue
Block a user