Files
eigen/Eigen/GPU
Rasmus Munk Larsen 014f12f11a GPU: Add BLAS-1 ops, DeviceScalar, device-resident SpMV, and CG interop (5/5)
Add the operator interface needed for GPU iterative solvers:

- BLAS Level-1 on DeviceMatrix: dot(), norm(), squaredNorm(), setZero(),
  noalias(), operator+=/-=/\*= dispatching to cuBLAS axpy/scal/dot/nrm2.
- DeviceScalar<Scalar>: device-resident scalar returned by reductions.
  Defers host sync until value is read (implicit conversion). Device-side
  division via NPP for real types.
- GpuContext: stream-borrowing constructor, setThreadLocal(), cublasLtHandle(),
  cusparseHandle().
- GEMM upgraded from cublasGemmEx to cublasLtMatmul with heuristic algorithm
  selection and plan caching.
- GpuSparseContext: GpuContext& constructor for same-stream execution,
  deviceView() returning DeviceSparseView with operator* for device-resident
  SpMV (d_y = d_A * d_x).
- geam expressions: d_C = d_A + alpha * d_B via cublasXgeam.
- GpuSVD::matrixV() convenience wrapper.

These additions make DeviceMatrix usable as a VectorType in Eigen algorithm
templates. Conjugate gradient is the motivating example and is tested against
CPU ConjugateGradient for correctness.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 20:19:59 -07:00

73 lines
2.1 KiB
Plaintext

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_GPU_MODULE_H
#define EIGEN_GPU_MODULE_H
#include "Core"
#include "src/Core/util/DisableStupidWarnings.h"
/** \defgroup GPU_Module GPU module
*
* GPU-accelerated solvers and operations using NVIDIA CUDA libraries
* (cuSOLVER, cuBLAS, cuSPARSE, cuFFT, cuDSS).
*
* This module provides explicit GPU solver classes that coexist with Eigen's
* CPU solvers. Unlike the LAPACKE dispatch (which replaces the CPU
* implementation globally), GPU classes are separate types the user
* instantiates by choice:
*
* \code
* #define EIGEN_USE_GPU
* #include <Eigen/GPU>
*
* // CPU path (unchanged)
* Eigen::LLT<Eigen::MatrixXd> llt_cpu(A);
*
* // GPU path (explicit)
* Eigen::GpuLLT<double> llt_gpu(A); // L stays on device
* auto X = llt_gpu.solve(B); // only B transferred per solve
* \endcode
*
* Requires CUDA 11.4+. See CLAUDE.md.
*/
#ifdef EIGEN_USE_GPU
// IWYU pragma: begin_exports
#include "src/GPU/DeviceScalar.h"
#include "src/GPU/DeviceMatrix.h"
#include "src/GPU/GpuContext.h"
#include "src/GPU/DeviceExpr.h"
#include "src/GPU/DeviceBlasExpr.h"
#include "src/GPU/DeviceSolverExpr.h"
#include "src/GPU/DeviceDispatch.h"
#include "src/GPU/GpuLLT.h"
#include "src/GPU/GpuLU.h"
#include "src/GPU/GpuQR.h"
#include "src/GPU/GpuSVD.h"
#include "src/GPU/GpuEigenSolver.h"
#include "src/GPU/CuFftSupport.h"
#include "src/GPU/GpuFFT.h"
#include "src/GPU/CuSparseSupport.h"
#ifdef EIGEN_SPARSECORE_MODULE_H
#include "src/GPU/GpuSparseContext.h"
#endif
#if defined(EIGEN_CUDSS) && defined(EIGEN_SPARSECORE_MODULE_H)
#include "src/GPU/CuDssSupport.h"
#include "src/GPU/GpuSparseSolverBase.h"
#include "src/GPU/GpuSparseLLT.h"
#include "src/GPU/GpuSparseLDLT.h"
#include "src/GPU/GpuSparseLU.h"
#endif
// IWYU pragma: end_exports
#endif
#include "src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_GPU_MODULE_H