Files
eigen/benchmarks/CMakeLists.txt
Rasmus Munk Larsen 58c44ef36d GPU: Add library dispatch module (DeviceMatrix, cuBLAS, cuSOLVER)
Add Eigen/GPU module: A standalone GPU library dispatch layer where
DeviceMatrix<Scalar> operations map 1:1 to cuBLAS/cuSOLVER calls.
CPU and GPU solvers coexist in the same binary with compatible syntax.

Core infrastructure:
- DeviceMatrix<Scalar>: RAII dense column-major GPU memory wrapper with
  async host transfer (fromHost/toHost) and CUDA event-based cross-stream
  synchronization.
- GpuContext: Unified execution context owning a CUDA stream + cuBLAS
  handle + cuSOLVER handle. Thread-local default with explicit override
  via setThreadLocal(). Stream-borrowing constructor for integration.
- DeviceBuffer: Typed RAII device allocation with move semantics.

cuBLAS dispatch (expression syntax):
- GEMM: d_C = d_A.adjoint() * d_B (cublasXgemm)
- TRSM: d_X = d_A.triangularView<Lower>().solve(d_B) (cublasXtrsm)
- SYMM/HEMM: d_C = d_A.selfadjointView<Lower>() * d_B (cublasXsymm)
- SYRK/HERK: d_C = d_A * d_A.adjoint() (cublasXsyrk)

cuSOLVER dispatch:
- GpuLLT: Cached Cholesky factorization (cusolverDnXpotrf + Xpotrs)
- GpuLU: Cached LU factorization (cusolverDnXgetrf + Xgetrs)
- Solver chaining: auto x = d_A.llt().solve(d_B)
- Solver expressions with .device(ctx) for explicit stream control.

CI: Bump CUDA container to Ubuntu 22.04 (CMake 3.22), GCC 10->11,
Clang 12->14. Bump cmake_minimum_required to 3.17 for FindCUDAToolkit.

Tests: gpu_cublas.cpp, gpu_cusolver_llt.cpp, gpu_cusolver_lu.cpp,
gpu_device_matrix.cpp, gpu_library_example.cu
Benchmarks: bench_gpu_solvers.cpp, bench_gpu_chaining.cpp,
bench_gpu_batching.cpp
2026-04-09 19:05:25 -07:00

53 lines
1.7 KiB
CMake

cmake_minimum_required(VERSION 3.10)
project(EigenBenchmarks CXX)
find_package(benchmark REQUIRED)
find_package(BLAS QUIET)
# Eigen is a header-only library; find it relative to this directory.
set(EIGEN_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/..")
# Helper: add a Google Benchmark target.
# eigen_add_benchmark(name source [LIBRARIES lib1 lib2 ...] [DEFINITIONS def1 def2 ...])
function(eigen_add_benchmark name source)
cmake_parse_arguments(BENCH "" "" "LIBRARIES;DEFINITIONS" ${ARGN})
if(NOT IS_ABSOLUTE "${source}")
set(source "${CMAKE_CURRENT_SOURCE_DIR}/${source}")
endif()
add_executable(${name} ${source})
target_include_directories(${name} PRIVATE ${EIGEN_SOURCE_DIR})
target_link_libraries(${name} PRIVATE benchmark::benchmark benchmark::benchmark_main)
if(BENCH_LIBRARIES)
target_link_libraries(${name} PRIVATE ${BENCH_LIBRARIES})
endif()
target_compile_options(${name} PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/O2>
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-O3>
)
target_compile_definitions(${name} PRIVATE NDEBUG)
if(BENCH_DEFINITIONS)
target_compile_definitions(${name} PRIVATE ${BENCH_DEFINITIONS})
endif()
endfunction()
add_subdirectory(Core)
add_subdirectory(Cholesky)
add_subdirectory(LU)
add_subdirectory(QR)
add_subdirectory(SVD)
add_subdirectory(Eigenvalues)
add_subdirectory(Geometry)
add_subdirectory(Sparse)
add_subdirectory(FFT)
add_subdirectory(Householder)
add_subdirectory(Solvers)
add_subdirectory(Tuning)
add_subdirectory(BLAS)
# GPU benchmarks have their own CMake project (needs CUDAToolkit).
# They can also be built standalone: cmake -B build -S benchmarks/GPU
find_package(CUDAToolkit QUIET)
if(CUDAToolkit_FOUND)
add_subdirectory(GPU)
endif()