Files
eigen/test/CMakeLists.txt
Rasmus Munk Larsen 014f12f11a GPU: Add BLAS-1 ops, DeviceScalar, device-resident SpMV, and CG interop (5/5)
Add the operator interface needed for GPU iterative solvers:

- BLAS Level-1 on DeviceMatrix: dot(), norm(), squaredNorm(), setZero(),
  noalias(), operator+=/-=/\*= dispatching to cuBLAS axpy/scal/dot/nrm2.
- DeviceScalar<Scalar>: device-resident scalar returned by reductions.
  Defers host sync until value is read (implicit conversion). Device-side
  division via NPP for real types.
- GpuContext: stream-borrowing constructor, setThreadLocal(), cublasLtHandle(),
  cusparseHandle().
- GEMM upgraded from cublasGemmEx to cublasLtMatmul with heuristic algorithm
  selection and plan caching.
- GpuSparseContext: GpuContext& constructor for same-stream execution,
  deviceView() returning DeviceSparseView with operator* for device-resident
  SpMV (d_y = d_A * d_x).
- geam expressions: d_C = d_A + alpha * d_B via cublasXgeam.
- GpuSVD::matrixV() convenience wrapper.

These additions make DeviceMatrix usable as a VectorType in Eigen algorithm
templates. Conjugate gradient is the motivating example and is tested against
CPU ConjugateGradient for correctness.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 20:19:59 -07:00

720 lines
27 KiB
CMake

# The file split_test_helper.h was generated at first run,
# it is now included in test/
if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h)
file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h)
endif()
# check if we have a Fortran compiler
include(CheckLanguage)
check_language(Fortran)
if(CMAKE_Fortran_COMPILER)
enable_language(Fortran)
set(EIGEN_Fortran_COMPILER_WORKS ON)
else()
set(EIGEN_Fortran_COMPILER_WORKS OFF)
# search for a default Lapack library to complete Eigen's one
find_package(LAPACK QUIET)
endif()
# TODO do the same for EXTERNAL_LAPACK
option(EIGEN_TEST_EXTERNAL_BLAS "Use external BLAS library for testsuite" OFF)
if(EIGEN_TEST_EXTERNAL_BLAS)
find_package(BLAS REQUIRED)
message(STATUS "BLAS_COMPILER_FLAGS: ${BLAS_COMPILER_FLAGS}")
add_definitions("-DEIGEN_USE_BLAS") # is adding ${BLAS_COMPILER_FLAGS} necessary?
list(APPEND EXTERNAL_LIBS "${BLAS_LIBRARIES}")
endif()
# configure blas/lapack (use Eigen's ones)
set(EIGEN_BLAS_LIBRARIES eigen_blas)
set(EIGEN_LAPACK_LIBRARIES eigen_lapack)
set(EIGEN_TEST_MATRIX_DIR "" CACHE STRING "Enable testing of realword sparse matrices contained in the specified path")
if(EIGEN_TEST_MATRIX_DIR)
if(NOT WIN32)
message(STATUS "Test realworld sparse matrices: ${EIGEN_TEST_MATRIX_DIR}")
add_definitions( -DTEST_REAL_CASES="${EIGEN_TEST_MATRIX_DIR}" )
else()
message(STATUS "REAL CASES CAN NOT BE CURRENTLY TESTED ON WIN32")
endif()
endif()
set(SPARSE_LIBS " ")
find_package(CHOLMOD)
if(CHOLMOD_FOUND AND EIGEN_BUILD_BLAS AND EIGEN_BUILD_LAPACK)
add_definitions("-DEIGEN_CHOLMOD_SUPPORT")
include_directories(${CHOLMOD_INCLUDES})
set(SPARSE_LIBS ${SPARSE_LIBS} ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES})
set(CHOLMOD_ALL_LIBS ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "CHOLMOD, ")
ei_add_test(cholmod_support "" "${CHOLMOD_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "CHOLMOD, ")
endif()
find_package(UMFPACK)
if(UMFPACK_FOUND AND EIGEN_BUILD_BLAS)
add_definitions("-DEIGEN_UMFPACK_SUPPORT")
include_directories(${UMFPACK_INCLUDES})
set(SPARSE_LIBS ${SPARSE_LIBS} ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "UMFPACK, ")
ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "UMFPACK, ")
endif()
find_package(KLU)
if(KLU_FOUND AND EIGEN_BUILD_BLAS)
add_definitions("-DEIGEN_KLU_SUPPORT")
include_directories(${KLU_INCLUDES})
set(SPARSE_LIBS ${SPARSE_LIBS} ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
set(KLU_ALL_LIBS ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "KLU, ")
ei_add_test(klu_support "" "${KLU_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "KLU, ")
endif()
find_package(SuperLU 4.0)
if(SuperLU_FOUND AND EIGEN_BUILD_BLAS)
add_definitions("-DEIGEN_SUPERLU_SUPPORT")
include_directories(${SUPERLU_INCLUDES})
set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
set(SUPERLU_ALL_LIBS ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "SuperLU, ")
ei_add_test(superlu_support "" "${SUPERLU_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "SuperLU, ")
endif()
find_package(PASTIX QUIET COMPONENTS METIS SEQ)
# check that the PASTIX found is a version without MPI
find_path(PASTIX_pastix_nompi.h_INCLUDE_DIRS
NAMES pastix_nompi.h
HINTS ${PASTIX_INCLUDE_DIRS}
)
if (NOT PASTIX_pastix_nompi.h_INCLUDE_DIRS)
message(STATUS "A version of Pastix has been found but pastix_nompi.h does not exist in the include directory."
" Because Eigen tests require a version without MPI, we disable the Pastix backend.")
endif()
if(PASTIX_FOUND AND PASTIX_pastix_nompi.h_INCLUDE_DIRS)
add_definitions("-DEIGEN_PASTIX_SUPPORT")
include_directories(${PASTIX_INCLUDE_DIRS_DEP})
if(SCOTCH_FOUND)
include_directories(${SCOTCH_INCLUDE_DIRS})
set(PASTIX_LIBRARIES ${PASTIX_LIBRARIES} ${SCOTCH_LIBRARIES})
elseif(METIS_FOUND)
include_directories(${METIS_INCLUDE_DIRS})
set(PASTIX_LIBRARIES ${PASTIX_LIBRARIES} ${METIS_LIBRARIES})
else()
ei_add_property(EIGEN_MISSING_BACKENDS "PaStiX, ")
endif()
set(SPARSE_LIBS ${SPARSE_LIBS} ${PASTIX_LIBRARIES_DEP} ${ORDERING_LIBRARIES})
set(PASTIX_ALL_LIBS ${PASTIX_LIBRARIES_DEP})
ei_add_property(EIGEN_TESTED_BACKENDS "PaStiX, ")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "PaStiX, ")
endif()
if(METIS_FOUND)
add_definitions("-DEIGEN_METIS_SUPPORT")
include_directories(${METIS_INCLUDE_DIRS})
ei_add_property(EIGEN_TESTED_BACKENDS "METIS, ")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "METIS, ")
endif()
find_package(SPQR)
if(SPQR_FOUND AND CHOLMOD_FOUND AND EIGEN_BUILD_BLAS AND EIGEN_BUILD_LAPACK AND (EIGEN_Fortran_COMPILER_WORKS OR LAPACK_FOUND) )
add_definitions("-DEIGEN_SPQR_SUPPORT")
include_directories(${SPQR_INCLUDES})
set(SPQR_ALL_LIBS ${SPQR_LIBRARIES} ${CHOLMOD_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${LAPACK_LIBRARIES})
set(SPARSE_LIBS ${SPARSE_LIBS} ${SPQR_ALL_LIBS})
ei_add_property(EIGEN_TESTED_BACKENDS "SPQR, ")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "SPQR, ")
endif()
find_package(Accelerate)
if(Accelerate_FOUND)
add_definitions("-DEIGEN_ACCELERATE_SUPPORT")
include_directories(${Accelerate_INCLUDES})
set(SPARSE_LIBS ${SPARSE_LIBS} ${Accelerate_LIBRARIES})
set(Accelerate_ALL_LIBS ${Accelerate_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "Accelerate, ")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "Accelerate, ")
endif()
option(EIGEN_TEST_NOQT "Disable Qt support in unit tests" OFF)
if(NOT EIGEN_TEST_NOQT)
find_package(Qt4)
if(QT4_FOUND)
include(${QT_USE_FILE})
ei_add_property(EIGEN_TESTED_BACKENDS "Qt4 support, ")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "Qt4 support, ")
endif()
endif()
if(TEST_LIB)
add_definitions("-DEIGEN_EXTERN_INSTANTIATIONS=1")
endif()
set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official")
add_custom_target(BuildOfficial)
ei_add_test(clz)
ei_add_test(rand)
ei_add_test(realview)
ei_add_test(meta)
ei_add_test(maxsizevector)
ei_add_test(numext)
ei_add_test(sizeof)
ei_add_test(dynalloc)
ei_add_test(nomalloc)
ei_add_test(first_aligned)
ei_add_test(type_alias)
ei_add_test(nullary)
ei_add_test(mixingtypes)
ei_add_test(float_conversion)
ei_add_test(io)
ei_add_test(packetmath "-DEIGEN_FAST_MATH=1")
# Generic clang vector backend tests for different vector sizes.
include(CheckCXXSourceCompiles)
check_cxx_source_compiles("
typedef float v4sf __attribute__((ext_vector_type(4)));
int main() { return __builtin_vectorelements(v4sf{}); }
" COMPILER_SUPPORTS_VECTOR_EXTENSIONS)
if(COMPILER_SUPPORTS_VECTOR_EXTENSIONS)
ei_add_test(packetmath_generic_16 "-DEIGEN_FAST_MATH=1")
ei_add_test(packetmath_generic_32 "-DEIGEN_FAST_MATH=1")
ei_add_test(packetmath_generic_64 "-DEIGEN_FAST_MATH=1")
ei_add_test(mixingtypes_generic_32)
ei_add_test(mixingtypes_generic_64)
endif()
ei_add_test(packet_segment)
ei_add_test(vectorization_logic)
ei_add_test(basicstuff)
ei_add_test(constexpr)
ei_add_test(constructor)
ei_add_test(linearstructure)
ei_add_test(integer_types)
ei_add_test(unalignedcount)
if(NOT EIGEN_TEST_NO_EXCEPTIONS AND NOT EIGEN_TEST_OPENMP)
ei_add_test(exceptions)
endif()
ei_add_test(redux)
ei_add_test(visitor)
ei_add_test(block)
ei_add_test(corners)
ei_add_test(symbolic_index)
ei_add_test(indexed_view)
ei_add_test(reshape)
ei_add_test(swap)
ei_add_test(resize)
ei_add_test(no_automatic_resizing)
ei_add_test(conservative_resize)
ei_add_test(product_small)
ei_add_test(product_large)
ei_add_test(product_extra)
ei_add_test(diagonalmatrices)
ei_add_test(diagonalview)
ei_add_test(skew_symmetric_matrix3)
ei_add_test(adjoint)
ei_add_test(diagonal)
ei_add_test(miscmatrices)
ei_add_test(commainitializer)
ei_add_test(smallvectors)
ei_add_test(mapped_matrix)
ei_add_test(mapstride)
ei_add_test(unaryview)
ei_add_test(mapstaticmethods)
ei_add_test(array_cwise)
ei_add_test(matrix_cwise)
ei_add_test(array_for_matrix)
ei_add_test(array_replicate)
ei_add_test(array_reverse)
ei_add_test(ref)
ei_add_test(is_same_dense)
ei_add_test(triangular)
ei_add_test(selfadjoint)
ei_add_test(product_selfadjoint)
ei_add_test(product_symm)
ei_add_test(product_syrk)
ei_add_test(product_trmv)
ei_add_test(product_trmm)
ei_add_test(product_trsolve)
ei_add_test(product_mmtr)
ei_add_test(product_notemporary)
ei_add_test(product_threaded "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(stable_norm)
ei_add_test(permutationmatrices)
ei_add_test(bandmatrix)
ei_add_test(cholesky)
ei_add_test(condition_estimator)
ei_add_test(lu)
ei_add_test(determinant)
ei_add_test(inverse)
ei_add_test(qr)
ei_add_test(qr_colpivoting)
ei_add_test(qr_fullpivoting)
ei_add_test(upperbidiagonalization)
ei_add_test(hessenberg)
ei_add_test(schur_real)
ei_add_test(schur_complex)
ei_add_test(eigensolver_selfadjoint)
ei_add_test(eigensolver_generic)
ei_add_test(eigensolver_complex)
ei_add_test(real_qz)
ei_add_test(complex_qz)
ei_add_test(eigensolver_generalized_real)
ei_add_test(jacobi)
ei_add_test(jacobisvd)
ei_add_test(bdcsvd)
ei_add_test(householder)
ei_add_test(geo_orthomethods)
ei_add_test(geo_quaternion)
ei_add_test(geo_eulerangles)
ei_add_test(geo_parametrizedline)
ei_add_test(geo_alignedbox)
ei_add_test(geo_hyperplane)
ei_add_test(geo_transformations)
ei_add_test(geo_homogeneous)
ei_add_test(stdvector)
ei_add_test(stdvector_overload)
ei_add_test(stdlist)
ei_add_test(stdlist_overload)
ei_add_test(stddeque)
ei_add_test(stddeque_overload)
ei_add_test(sparse_basic)
ei_add_test(sparse_block)
ei_add_test(sparse_vector)
ei_add_test(sparse_product)
ei_add_test(sparse_ref)
ei_add_test(sparse_solvers)
ei_add_test(sparse_permutations)
ei_add_test(simplicial_cholesky)
ei_add_test(conjugate_gradient)
ei_add_test(incomplete_cholesky)
ei_add_test(incomplete_LUT)
ei_add_test(bicgstab)
ei_add_test(lscg)
ei_add_test(sparselu)
ei_add_test(sparseqr)
ei_add_test(umeyama)
ei_add_test(nesting_ops "${CMAKE_CXX_FLAGS_DEBUG}")
ei_add_test(nestbyvalue)
ei_add_test(zerosized)
ei_add_test(dontalign)
ei_add_test(evaluators)
if(NOT EIGEN_TEST_NO_EXCEPTIONS)
ei_add_test(sizeoverflow)
endif()
ei_add_test(prec_inverse_4x4)
ei_add_test(vectorwiseop)
ei_add_test(special_numbers)
ei_add_test(rvalue_types)
ei_add_test(dense_storage)
ei_add_test(ctorleak)
ei_add_test(inplace_decomposition)
ei_add_test(half_float)
ei_add_test(bfloat16_float)
ei_add_test(array_of_string)
ei_add_test(num_dimensions)
ei_add_test(stl_iterators)
ei_add_test(blasutil)
ei_add_test(random_matrix)
ei_add_test(initializer_list_construction)
ei_add_test(diagonal_matrix_variadic_ctor)
ei_add_test(serializer)
ei_add_test(tuple_test)
ei_add_test(threads_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(threads_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(threads_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(threads_fork_join "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
add_executable(bug1213 bug1213.cpp bug1213_main.cpp)
target_link_libraries(bug1213 Eigen3::Eigen)
check_cxx_compiler_flag("-ffast-math" COMPILER_SUPPORT_FASTMATH)
if(COMPILER_SUPPORT_FASTMATH)
set(EIGEN_FASTMATH_FLAGS "-ffast-math")
else()
check_cxx_compiler_flag("/fp:fast" COMPILER_SUPPORT_FPFAST)
if(COMPILER_SUPPORT_FPFAST)
set(EIGEN_FASTMATH_FLAGS "/fp:fast")
endif()
endif()
# The fastmath test intentionally uses NaN/infinity under -ffast-math.
# Suppress the clang warning about this being technically undefined.
check_cxx_compiler_flag("-Wno-nan-infinity-disabled" COMPILER_SUPPORT_WNO_NAN_INF)
if(COMPILER_SUPPORT_WNO_NAN_INF)
set(EIGEN_FASTMATH_FLAGS "${EIGEN_FASTMATH_FLAGS} -Wno-nan-infinity-disabled")
endif()
ei_add_test(fastmath "${EIGEN_FASTMATH_FLAGS}")
# # ei_add_test(denseLM)
if(QT4_FOUND)
ei_add_test(qtvector "" "${QT_QTCORE_LIBRARY}")
endif()
if(PARDISO_FOUND)
ei_add_test(pardiso_support "" "${PARDISO_ALL_LIBS}")
endif()
if(PASTIX_FOUND AND (SCOTCH_FOUND OR METIS_FOUND))
ei_add_test(pastix_support "" "${PASTIX_ALL_LIBS}")
endif()
if(SPQR_FOUND AND CHOLMOD_FOUND AND EIGEN_BUILD_BLAS AND EIGEN_BUILD_LAPACK)
ei_add_test(spqr_support "" "${SPQR_ALL_LIBS}")
endif()
if(METIS_FOUND)
ei_add_test(metis_support "" "${METIS_LIBRARIES}")
endif()
if(Accelerate_FOUND)
ei_add_test(accelerate_support "" "${Accelerate_ALL_LIBS}")
endif()
string(TOLOWER "${CMAKE_CXX_COMPILER}" cmake_cxx_compiler_tolower)
if(cmake_cxx_compiler_tolower MATCHES "qcc")
set(CXX_IS_QCC "ON")
endif()
ei_add_property(EIGEN_TESTING_SUMMARY "CXX: ${CMAKE_CXX_COMPILER}\n")
if(CMAKE_COMPILER_IS_GNUCXX AND NOT CXX_IS_QCC)
execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version COMMAND head -n 1 OUTPUT_VARIABLE EIGEN_CXX_VERSION_STRING OUTPUT_STRIP_TRAILING_WHITESPACE)
ei_add_property(EIGEN_TESTING_SUMMARY "CXX_VERSION: ${EIGEN_CXX_VERSION_STRING}\n")
endif()
ei_add_property(EIGEN_TESTING_SUMMARY "CXX_FLAGS: ${CMAKE_CXX_FLAGS}\n")
if (EIGEN_TEST_CUSTOM_CXX_FLAGS)
ei_add_property(EIGEN_TESTING_SUMMARY "Custom CXX flags: ${EIGEN_TEST_CUSTOM_CXX_FLAGS}\n")
endif()
ei_add_property(EIGEN_TESTING_SUMMARY "Sparse lib flags: ${SPARSE_LIBS}\n")
option(EIGEN_TEST_EIGEN2 "Run whole Eigen2 test suite against EIGEN2_SUPPORT" OFF)
mark_as_advanced(EIGEN_TEST_EIGEN2)
if(EIGEN_TEST_EIGEN2)
message(WARNING "The Eigen2 test suite has been removed")
endif()
# boost MP unit test
find_package(Boost 1.53.0 CONFIG)
if(Boost_FOUND)
include_directories(${Boost_INCLUDE_DIRS})
ei_add_test(boostmultiprec "" "${Boost_LIBRARIES}")
ei_add_property(EIGEN_TESTED_BACKENDS "Boost.Multiprecision, ")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "Boost.Multiprecision, ")
endif()
# CUDA unit tests
option(EIGEN_TEST_CUDA "Enable CUDA support in unit tests" OFF)
option(EIGEN_TEST_CUDA_CLANG "Use clang instead of nvcc to compile the CUDA tests" OFF)
option(EIGEN_TEST_CUDA_NVC "Use nvc++ (NVHPC) instead of nvcc to compile the CUDA tests" OFF)
if(EIGEN_TEST_CUDA_CLANG AND NOT CMAKE_CXX_COMPILER MATCHES "clang")
message(WARNING "EIGEN_TEST_CUDA_CLANG is set, but CMAKE_CXX_COMPILER does not appear to be clang.")
endif()
if(EIGEN_TEST_CUDA_NVC AND NOT CMAKE_CXX_COMPILER_ID MATCHES "NVHPC")
message(WARNING "EIGEN_TEST_CUDA_NVC is set, but CMAKE_CXX_COMPILER does not appear to be nvc++.")
endif()
find_package(CUDA 11.4)
if(CUDA_FOUND AND EIGEN_TEST_CUDA)
# Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
# and -fno-check-new flags since they trigger thousands of compilation warnings
# in the CUDA runtime
string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
if(EIGEN_TEST_CUDA_CLANG)
string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}")
foreach(GPU IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
string(APPEND CMAKE_CXX_FLAGS " --cuda-gpu-arch=sm_${GPU}")
endforeach()
string(APPEND CMAKE_CXX_FLAGS " ${EIGEN_CUDA_CXX_FLAGS}")
elseif(EIGEN_TEST_CUDA_NVC)
string(APPEND CMAKE_CXX_FLAGS " -cuda")
foreach(GPU IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
string(APPEND CMAKE_CXX_FLAGS " -gpu=cc${GPU}")
endforeach()
string(APPEND CMAKE_CXX_FLAGS " ${EIGEN_CUDA_CXX_FLAGS}")
else()
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
set(NVCC_ARCH_FLAGS)
# Define an -arch=sm_<arch>, otherwise if GPU does not exactly match one of
# those in the arch list for -gencode, the kernels will fail to run with
# cudaErrorNoKernelImageForDevice
# This can happen with newer cards (e.g. sm_75) and compiling with older
# versions of nvcc (e.g. 9.2) that do not support their specific arch.
list(LENGTH EIGEN_CUDA_COMPUTE_ARCH EIGEN_CUDA_COMPUTE_ARCH_SIZE)
if(EIGEN_CUDA_COMPUTE_ARCH_SIZE)
list(GET EIGEN_CUDA_COMPUTE_ARCH 0 EIGEN_CUDA_COMPUTE_DEFAULT)
set(NVCC_ARCH_FLAGS " -arch=sm_${EIGEN_CUDA_COMPUTE_DEFAULT}")
endif()
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
endforeach()
set(CUDA_NVCC_FLAGS "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX_FLAGS}")
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
endif()
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
ei_add_test(gpu_example)
ei_add_test(gpu_basic)
ei_add_test(gpu_library_example "" "CUDA::cusolver")
# DeviceMatrix tests: CUDA runtime + cuBLAS + cuSOLVER (for BLAS-1 ops via GpuContext).
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
add_executable(gpu_device_matrix gpu_device_matrix.cpp)
target_include_directories(gpu_device_matrix PRIVATE
"${CUDA_TOOLKIT_ROOT_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries(gpu_device_matrix Eigen3::Eigen CUDA::cudart CUDA::cublas CUDA::cusolver CUDA::npps CUDA::nppc)
target_compile_definitions(gpu_device_matrix PRIVATE
EIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}
EIGEN_TEST_PART_ALL=1)
add_test(NAME gpu_device_matrix COMMAND gpu_device_matrix)
add_dependencies(buildtests gpu_device_matrix)
add_dependencies(buildtests_gpu gpu_device_matrix)
set_property(TEST gpu_device_matrix APPEND PROPERTY LABELS "Official;gpu")
set_property(TEST gpu_device_matrix PROPERTY SKIP_RETURN_CODE 77)
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
# Library-specific GPU tests (activated by later phases, OFF by default).
# CUDAToolkit imported targets (CUDA::cublas, etc.) are available from
# find_package(CUDAToolkit) above.
option(EIGEN_TEST_CUBLAS "Test cuBLAS integration" OFF)
if(EIGEN_TEST_CUBLAS AND TARGET CUDA::cublas)
# cuBLAS tests are plain .cpp files (no device code), like cuSOLVER tests.
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
add_executable(gpu_cublas gpu_cublas.cpp)
target_include_directories(gpu_cublas PRIVATE
"${CUDA_TOOLKIT_ROOT_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries(gpu_cublas
Eigen3::Eigen CUDA::cudart CUDA::cublas CUDA::cusolver)
target_compile_definitions(gpu_cublas PRIVATE
EIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}
EIGEN_TEST_PART_ALL=1)
add_test(NAME gpu_cublas COMMAND gpu_cublas)
add_dependencies(buildtests gpu_cublas)
add_dependencies(buildtests_gpu gpu_cublas)
set_property(TEST gpu_cublas APPEND PROPERTY LABELS "Official;gpu")
set_property(TEST gpu_cublas PROPERTY SKIP_RETURN_CODE 77)
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
endif()
option(EIGEN_TEST_CUSOLVER "Test cuSOLVER integration" OFF)
if(EIGEN_TEST_CUSOLVER AND TARGET CUDA::cusolver)
# cuSOLVER tests are plain .cpp files: no device code, compiled by the host
# compiler and linked against CUDA runtime + cuSOLVER. This avoids NVCC
# instantiating Eigen's CPU packet operations for CUDA vector types.
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
foreach(_cusolver_test IN ITEMS gpu_cusolver_llt gpu_cusolver_lu gpu_cusolver_qr gpu_cusolver_svd gpu_cusolver_eigen)
add_executable(${_cusolver_test} ${_cusolver_test}.cpp)
target_include_directories(${_cusolver_test} PRIVATE
"${CUDA_TOOLKIT_ROOT_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries(${_cusolver_test}
Eigen3::Eigen CUDA::cudart CUDA::cusolver CUDA::cublas)
target_compile_definitions(${_cusolver_test} PRIVATE
EIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}
EIGEN_TEST_PART_ALL=1)
add_test(NAME ${_cusolver_test} COMMAND "${_cusolver_test}")
add_dependencies(buildtests ${_cusolver_test})
add_dependencies(buildtests_gpu ${_cusolver_test})
set_property(TEST ${_cusolver_test} APPEND PROPERTY LABELS "Official;gpu")
set_property(TEST ${_cusolver_test} PROPERTY SKIP_RETURN_CODE 77)
endforeach()
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
endif()
# cuFFT test (cuFFT is part of the CUDA toolkit — no separate option needed).
if(TARGET CUDA::cufft)
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
add_executable(gpu_cufft gpu_cufft.cpp)
target_include_directories(gpu_cufft PRIVATE
"${CUDA_TOOLKIT_ROOT_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries(gpu_cufft
Eigen3::Eigen CUDA::cudart CUDA::cufft CUDA::cublas)
target_compile_definitions(gpu_cufft PRIVATE
EIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}
EIGEN_TEST_PART_ALL=1)
add_test(NAME gpu_cufft COMMAND gpu_cufft)
add_dependencies(buildtests gpu_cufft)
add_dependencies(buildtests_gpu gpu_cufft)
set_property(TEST gpu_cufft APPEND PROPERTY LABELS "Official;gpu")
set_property(TEST gpu_cufft PROPERTY SKIP_RETURN_CODE 77)
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
endif()
# cuSPARSE SpMV test (cuSPARSE is part of the CUDA toolkit).
if(TARGET CUDA::cusparse)
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
add_executable(gpu_cusparse_spmv gpu_cusparse_spmv.cpp)
target_include_directories(gpu_cusparse_spmv PRIVATE
"${CUDA_TOOLKIT_ROOT_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries(gpu_cusparse_spmv
Eigen3::Eigen CUDA::cudart CUDA::cusparse CUDA::cublas CUDA::cusolver)
target_compile_definitions(gpu_cusparse_spmv PRIVATE
EIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}
EIGEN_TEST_PART_ALL=1)
add_test(NAME gpu_cusparse_spmv COMMAND gpu_cusparse_spmv)
add_dependencies(buildtests gpu_cusparse_spmv)
add_dependencies(buildtests_gpu gpu_cusparse_spmv)
set_property(TEST gpu_cusparse_spmv APPEND PROPERTY LABELS "Official;gpu")
set_property(TEST gpu_cusparse_spmv PROPERTY SKIP_RETURN_CODE 77)
# End-to-end GPU CG test: Eigen's ConjugateGradient with DeviceMatrix.
add_executable(gpu_cg gpu_cg.cpp)
target_include_directories(gpu_cg PRIVATE
"${CUDA_TOOLKIT_ROOT_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries(gpu_cg
Eigen3::Eigen CUDA::cudart CUDA::cusparse CUDA::cublas CUDA::cusolver CUDA::npps CUDA::nppc)
target_compile_definitions(gpu_cg PRIVATE
EIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}
EIGEN_TEST_PART_ALL=1)
add_test(NAME gpu_cg COMMAND gpu_cg)
add_dependencies(buildtests gpu_cg)
add_dependencies(buildtests_gpu gpu_cg)
set_property(TEST gpu_cg APPEND PROPERTY LABELS "Official;gpu")
set_property(TEST gpu_cg PROPERTY SKIP_RETURN_CODE 77)
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
endif()
option(EIGEN_TEST_CUSPARSE "Test cuSPARSE integration" OFF)
if(EIGEN_TEST_CUSPARSE AND TARGET CUDA::cusparse)
ei_add_test(gpu_cusparse "" "CUDA::cusparse")
endif()
# cuDSS sparse direct solver tests.
# cuDSS is distributed separately from the CUDA Toolkit.
option(EIGEN_TEST_CUDSS "Test cuDSS sparse solver integration" OFF)
if(EIGEN_TEST_CUDSS)
find_path(CUDSS_INCLUDE_DIR cudss.h
HINTS ${CUDSS_DIR}/include ${CUDA_TOOLKIT_ROOT_DIR}/include /usr/include)
find_library(CUDSS_LIBRARY cudss
HINTS ${CUDSS_DIR}/lib ${CUDSS_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib64 /usr/lib/x86_64-linux-gnu)
if(CUDSS_INCLUDE_DIR AND CUDSS_LIBRARY)
message(STATUS "cuDSS found: ${CUDSS_LIBRARY}")
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
foreach(_cudss_test IN ITEMS gpu_cudss_llt gpu_cudss_ldlt gpu_cudss_lu)
add_executable(${_cudss_test} ${_cudss_test}.cpp)
target_include_directories(${_cudss_test} PRIVATE
"${CUDA_TOOLKIT_ROOT_DIR}/include"
"${CUDSS_INCLUDE_DIR}"
"${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries(${_cudss_test}
Eigen3::Eigen CUDA::cudart CUDA::cusolver CUDA::cublas ${CUDSS_LIBRARY})
target_compile_definitions(${_cudss_test} PRIVATE
EIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}
EIGEN_TEST_PART_ALL=1
EIGEN_CUDSS=1)
add_test(NAME ${_cudss_test} COMMAND "${_cudss_test}")
add_dependencies(buildtests ${_cudss_test})
add_dependencies(buildtests_gpu ${_cudss_test})
set_property(TEST ${_cudss_test} APPEND PROPERTY LABELS "Official;gpu")
set_property(TEST ${_cudss_test} PROPERTY SKIP_RETURN_CODE 77)
endforeach()
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
else()
message(WARNING "EIGEN_TEST_CUDSS=ON but cuDSS not found. Set CUDSS_DIR.")
endif()
endif()
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
endif()
# HIP unit tests
option(EIGEN_TEST_HIP "Add HIP support." OFF)
if (EIGEN_TEST_HIP)
set(ROCM_PATH "/opt/rocm" CACHE STRING "Path to the ROCm installation.")
if (EXISTS ${ROCM_PATH}/hip)
set(HIP_PATH ${ROCM_PATH}/hip)
list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake)
elseif (EXISTS ${ROCM_PATH}/lib/cmake/hip)
set(HIP_PATH ${ROCM_PATH})
list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/lib/cmake/hip)
else ()
message(FATAL_ERROR "EIGEN_TEST_HIP is ON, but could not find the ROCm installation under ${ROCM_PATH}")
endif()
find_package(HIP REQUIRED)
if (HIP_FOUND AND HIP_VERSION VERSION_LESS "5.6")
message(FATAL_ERROR "Eigen requires ROCm/HIP >= 5.6, found ${HIP_VERSION}")
endif()
if (HIP_FOUND)
execute_process(COMMAND ${HIP_PATH}/bin/hipconfig --platform OUTPUT_VARIABLE HIP_PLATFORM)
if ((${HIP_PLATFORM} STREQUAL "hcc") OR (${HIP_PLATFORM} STREQUAL "amd"))
include_directories(${HIP_PATH}/include)
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
ei_add_test(gpu_basic)
ei_add_test(gpu_example)
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
elseif ((${HIP_PLATFORM} STREQUAL "nvcc") OR (${HIP_PLATFORM} STREQUAL "nvidia"))
message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
else ()
message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
endif()
endif()
endif()
if(EIGEN_TEST_SYCL)
set(EIGEN_SYCL ON)
include(SyclConfigureTesting)
ei_add_test(sycl_basic)
set(EIGEN_SYCL OFF)
endif()
cmake_dependent_option(EIGEN_TEST_BUILD_DOCUMENTATION "Test building the doxygen documentation" OFF "EIGEN_BUILD_DOC" OFF)
if(EIGEN_TEST_BUILD_DOCUMENTATION)
add_dependencies(buildtests doc)
endif()
# ULP accuracy measurement tool (see test/ulp_accuracy/README.md)
find_package(MPFR)
find_package(GMP)
add_executable(ulp_accuracy ulp_accuracy/ulp_accuracy.cpp)
target_compile_options(ulp_accuracy PRIVATE -pthread)
target_link_libraries(ulp_accuracy Eigen3::Eigen ${CMAKE_THREAD_LIBS_INIT} pthread)
if(MPFR_FOUND AND GMP_FOUND)
target_include_directories(ulp_accuracy PRIVATE ${MPFR_INCLUDES} ${GMP_INCLUDES})
target_link_libraries(ulp_accuracy ${MPFR_LIBRARIES} ${GMP_LIBRARIES})
target_compile_definitions(ulp_accuracy PRIVATE EIGEN_HAS_MPFR)
endif()
# Register all smoke tests
include("EigenSmokeTestList")
ei_add_smoke_tests("${ei_smoke_test_list}")