test/gpu_context.h

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2026 Rasmus Munk Larsen <rmlarsen@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_TEST_GPU_CONTEXT_H
#define EIGEN_TEST_GPU_CONTEXT_H

// RAII context for GPU tests that use NVIDIA library APIs (cuBLAS, cuSOLVER, etc.).
// Owns a non-default CUDA stream. Library handles (cuBLAS, cuSOLVER, etc.) are added
// here by each integration phase as needed; each handle is bound to the owned stream.
//
// Usage:
//   GpuContext ctx;
//   auto buf = gpu_copy_to_device(ctx.stream, A);
//   // ... call NVIDIA library APIs using ctx.stream / ctx.cusolver ...
//   ctx.synchronize();

#include "gpu_test_helper.h"

#ifdef EIGEN_USE_GPU
#include <cusolverDn.h>

// Checks cuSOLVER return codes, aborts on failure.
#define CUSOLVER_CHECK(expr)                                                                 \
  do {                                                                                       \
    cusolverStatus_t _status = (expr);                                                       \
    if (_status != CUSOLVER_STATUS_SUCCESS) {                                                \
      printf("cuSOLVER error %d at %s:%d\n", static_cast<int>(_status), __FILE__, __LINE__); \
      gpu_assert(false);                                                                     \
    }                                                                                        \
  } while (0)

struct GpuContext {
  cudaStream_t stream = nullptr;
  cusolverDnHandle_t cusolver = nullptr;

  GpuContext() {
    GPU_CHECK(gpuGetDevice(&device_));
    GPU_CHECK(gpuGetDeviceProperties(&device_props_, device_));
    GPU_CHECK(cudaStreamCreate(&stream));
    CUSOLVER_CHECK(cusolverDnCreate(&cusolver));
    CUSOLVER_CHECK(cusolverDnSetStream(cusolver, stream));
  }

  ~GpuContext() {
    if (cusolver) CUSOLVER_CHECK(cusolverDnDestroy(cusolver));
    if (stream) GPU_CHECK(cudaStreamDestroy(stream));
  }

  int device() const { return device_; }
  const gpuDeviceProp_t& deviceProperties() const { return device_props_; }

  // Wait for all work submitted on this context's stream to complete.
  void synchronize() { GPU_CHECK(cudaStreamSynchronize(stream)); }

  // Non-copyable, non-movable.
  GpuContext(const GpuContext&) = delete;
  GpuContext& operator=(const GpuContext&) = delete;

 private:
  int device_ = 0;
  gpuDeviceProp_t device_props_;
};

#endif  // EIGEN_USE_GPU

#endif  // EIGEN_TEST_GPU_CONTEXT_H
GPU: Add library dispatch module (DeviceMatrix, cuBLAS, cuSOLVER) Add Eigen/GPU module: A standalone GPU library dispatch layer where DeviceMatrix<Scalar> operations map 1:1 to cuBLAS/cuSOLVER calls. CPU and GPU solvers coexist in the same binary with compatible syntax. Core infrastructure: - DeviceMatrix<Scalar>: RAII dense column-major GPU memory wrapper with async host transfer (fromHost/toHost) and CUDA event-based cross-stream synchronization. - GpuContext: Unified execution context owning a CUDA stream + cuBLAS handle + cuSOLVER handle. Thread-local default with explicit override via setThreadLocal(). Stream-borrowing constructor for integration. - DeviceBuffer: Typed RAII device allocation with move semantics. cuBLAS dispatch (expression syntax): - GEMM: d_C = d_A.adjoint() * d_B (cublasXgemm) - TRSM: d_X = d_A.triangularView<Lower>().solve(d_B) (cublasXtrsm) - SYMM/HEMM: d_C = d_A.selfadjointView<Lower>() * d_B (cublasXsymm) - SYRK/HERK: d_C = d_A * d_A.adjoint() (cublasXsyrk) cuSOLVER dispatch: - GpuLLT: Cached Cholesky factorization (cusolverDnXpotrf + Xpotrs) - GpuLU: Cached LU factorization (cusolverDnXgetrf + Xgetrs) - Solver chaining: auto x = d_A.llt().solve(d_B) - Solver expressions with .device(ctx) for explicit stream control. CI: Bump CUDA container to Ubuntu 22.04 (CMake 3.22), GCC 10->11, Clang 12->14. Bump cmake_minimum_required to 3.17 for FindCUDAToolkit. Tests: gpu_cublas.cpp, gpu_cusolver_llt.cpp, gpu_cusolver_lu.cpp, gpu_device_matrix.cpp, gpu_library_example.cu Benchmarks: bench_gpu_solvers.cpp, bench_gpu_chaining.cpp, bench_gpu_batching.cpp 2026-04-09 16:15:39 -07:00			`// This file is part of Eigen, a lightweight C++ template library`
			`// for linear algebra.`
			`//`
			`// Copyright (C) 2026 Rasmus Munk Larsen <rmlarsen@gmail.com>`
			`//`
			`// This Source Code Form is subject to the terms of the Mozilla`
			`// Public License v. 2.0. If a copy of the MPL was not distributed`
			`// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.`

			`#ifndef EIGEN_TEST_GPU_CONTEXT_H`
			`#define EIGEN_TEST_GPU_CONTEXT_H`

			`// RAII context for GPU tests that use NVIDIA library APIs (cuBLAS, cuSOLVER, etc.).`
			`// Owns a non-default CUDA stream. Library handles (cuBLAS, cuSOLVER, etc.) are added`
			`// here by each integration phase as needed; each handle is bound to the owned stream.`
			`//`
			`// Usage:`
			`// GpuContext ctx;`
			`// auto buf = gpu_copy_to_device(ctx.stream, A);`
			`// // ... call NVIDIA library APIs using ctx.stream / ctx.cusolver ...`
			`// ctx.synchronize();`

			`#include "gpu_test_helper.h"`

			`#ifdef EIGEN_USE_GPU`
			`#include <cusolverDn.h>`

			`// Checks cuSOLVER return codes, aborts on failure.`
			`#define CUSOLVER_CHECK(expr) \`
			`do { \`
			`cusolverStatus_t _status = (expr); \`
			`if (_status != CUSOLVER_STATUS_SUCCESS) { \`
			`printf("cuSOLVER error %d at %s:%d\n", static_cast<int>(_status), __FILE__, __LINE__); \`
			`gpu_assert(false); \`
			`} \`
			`} while (0)`

			`struct GpuContext {`
			`cudaStream_t stream = nullptr;`
			`cusolverDnHandle_t cusolver = nullptr;`

			`GpuContext() {`
			`GPU_CHECK(gpuGetDevice(&device_));`
			`GPU_CHECK(gpuGetDeviceProperties(&device_props_, device_));`
			`GPU_CHECK(cudaStreamCreate(&stream));`
			`CUSOLVER_CHECK(cusolverDnCreate(&cusolver));`
			`CUSOLVER_CHECK(cusolverDnSetStream(cusolver, stream));`
			`}`

			`~GpuContext() {`
			`if (cusolver) CUSOLVER_CHECK(cusolverDnDestroy(cusolver));`
			`if (stream) GPU_CHECK(cudaStreamDestroy(stream));`
			`}`

			`int device() const { return device_; }`
			`const gpuDeviceProp_t& deviceProperties() const { return device_props_; }`

			`// Wait for all work submitted on this context's stream to complete.`
			`void synchronize() { GPU_CHECK(cudaStreamSynchronize(stream)); }`

			`// Non-copyable, non-movable.`
			`GpuContext(const GpuContext&) = delete;`
			`GpuContext& operator=(const GpuContext&) = delete;`

			`private:`
			`int device_ = 0;`
			`gpuDeviceProp_t device_props_;`
			`};`

			`#endif // EIGEN_USE_GPU`

			`#endif // EIGEN_TEST_GPU_CONTEXT_H`