mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Add Eigen/GPU module: A standalone GPU library dispatch layer where DeviceMatrix<Scalar> operations map 1:1 to cuBLAS/cuSOLVER calls. CPU and GPU solvers coexist in the same binary with compatible syntax. Core infrastructure: - DeviceMatrix<Scalar>: RAII dense column-major GPU memory wrapper with async host transfer (fromHost/toHost) and CUDA event-based cross-stream synchronization. - GpuContext: Unified execution context owning a CUDA stream + cuBLAS handle + cuSOLVER handle. Thread-local default with explicit override via setThreadLocal(). Stream-borrowing constructor for integration. - DeviceBuffer: Typed RAII device allocation with move semantics. cuBLAS dispatch (expression syntax): - GEMM: d_C = d_A.adjoint() * d_B (cublasXgemm) - TRSM: d_X = d_A.triangularView<Lower>().solve(d_B) (cublasXtrsm) - SYMM/HEMM: d_C = d_A.selfadjointView<Lower>() * d_B (cublasXsymm) - SYRK/HERK: d_C = d_A * d_A.adjoint() (cublasXsyrk) cuSOLVER dispatch: - GpuLLT: Cached Cholesky factorization (cusolverDnXpotrf + Xpotrs) - GpuLU: Cached LU factorization (cusolverDnXgetrf + Xgetrs) - Solver chaining: auto x = d_A.llt().solve(d_B) - Solver expressions with .device(ctx) for explicit stream control. CI: Bump CUDA container to Ubuntu 22.04 (CMake 3.22), GCC 10->11, Clang 12->14. Bump cmake_minimum_required to 3.17 for FindCUDAToolkit. Tests: gpu_cublas.cpp, gpu_cusolver_llt.cpp, gpu_cusolver_lu.cpp, gpu_device_matrix.cpp, gpu_library_example.cu Benchmarks: bench_gpu_solvers.cpp, bench_gpu_chaining.cpp, bench_gpu_batching.cpp
53 lines
1.7 KiB
CMake
53 lines
1.7 KiB
CMake
cmake_minimum_required(VERSION 3.10)
|
|
project(EigenBenchmarks CXX)
|
|
|
|
find_package(benchmark REQUIRED)
|
|
find_package(BLAS QUIET)
|
|
|
|
# Eigen is a header-only library; find it relative to this directory.
|
|
set(EIGEN_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/..")
|
|
|
|
# Helper: add a Google Benchmark target.
|
|
# eigen_add_benchmark(name source [LIBRARIES lib1 lib2 ...] [DEFINITIONS def1 def2 ...])
|
|
function(eigen_add_benchmark name source)
|
|
cmake_parse_arguments(BENCH "" "" "LIBRARIES;DEFINITIONS" ${ARGN})
|
|
if(NOT IS_ABSOLUTE "${source}")
|
|
set(source "${CMAKE_CURRENT_SOURCE_DIR}/${source}")
|
|
endif()
|
|
add_executable(${name} ${source})
|
|
target_include_directories(${name} PRIVATE ${EIGEN_SOURCE_DIR})
|
|
target_link_libraries(${name} PRIVATE benchmark::benchmark benchmark::benchmark_main)
|
|
if(BENCH_LIBRARIES)
|
|
target_link_libraries(${name} PRIVATE ${BENCH_LIBRARIES})
|
|
endif()
|
|
target_compile_options(${name} PRIVATE
|
|
$<$<CXX_COMPILER_ID:MSVC>:/O2>
|
|
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-O3>
|
|
)
|
|
target_compile_definitions(${name} PRIVATE NDEBUG)
|
|
if(BENCH_DEFINITIONS)
|
|
target_compile_definitions(${name} PRIVATE ${BENCH_DEFINITIONS})
|
|
endif()
|
|
endfunction()
|
|
|
|
add_subdirectory(Core)
|
|
add_subdirectory(Cholesky)
|
|
add_subdirectory(LU)
|
|
add_subdirectory(QR)
|
|
add_subdirectory(SVD)
|
|
add_subdirectory(Eigenvalues)
|
|
add_subdirectory(Geometry)
|
|
add_subdirectory(Sparse)
|
|
add_subdirectory(FFT)
|
|
add_subdirectory(Householder)
|
|
add_subdirectory(Solvers)
|
|
add_subdirectory(Tuning)
|
|
add_subdirectory(BLAS)
|
|
|
|
# GPU benchmarks have their own CMake project (needs CUDAToolkit).
|
|
# They can also be built standalone: cmake -B build -S benchmarks/GPU
|
|
find_package(CUDAToolkit QUIET)
|
|
if(CUDAToolkit_FOUND)
|
|
add_subdirectory(GPU)
|
|
endif()
|