test/gpu_cudss_llt.cpp

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2026 Rasmus Munk Larsen <rmlarsen@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

// Tests for GpuSparseLLT: GPU sparse Cholesky via cuDSS.

#define EIGEN_USE_GPU
#include "main.h"
#include <Eigen/Sparse>
#include <Eigen/GPU>

using namespace Eigen;

// ---- Helper: build a random sparse SPD matrix -------------------------------

template <typename Scalar>
SparseMatrix<Scalar, ColMajor, int> make_spd(Index n, double density = 0.1) {
  using SpMat = SparseMatrix<Scalar, ColMajor, int>;
  using RealScalar = typename NumTraits<Scalar>::Real;

  // Uses the global std::rand state seeded by the test framework (g_seed).
  SpMat R(n, n);
  R.reserve(VectorXi::Constant(n, static_cast<int>(n * density) + 1));
  for (Index j = 0; j < n; ++j) {
    for (Index i = 0; i < n; ++i) {
      if (i == j || (std::rand() / double(RAND_MAX)) < density) {
        R.insert(i, j) = Scalar(std::rand() / double(RAND_MAX) - 0.5);
      }
    }
  }
  R.makeCompressed();

  // A = R^H * R + n * I  (guaranteed SPD).
  SpMat A = R.adjoint() * R;
  for (Index i = 0; i < n; ++i) A.coeffRef(i, i) += Scalar(RealScalar(n));
  A.makeCompressed();
  return A;
}

// ---- Solve and check residual -----------------------------------------------

template <typename Scalar>
void test_solve(Index n) {
  using SpMat = SparseMatrix<Scalar, ColMajor, int>;
  using Vec = Matrix<Scalar, Dynamic, 1>;
  using RealScalar = typename NumTraits<Scalar>::Real;

  SpMat A = make_spd<Scalar>(n);
  Vec b = Vec::Random(n);

  GpuSparseLLT<Scalar> llt(A);
  VERIFY_IS_EQUAL(llt.info(), Success);

  Vec x = llt.solve(b);
  VERIFY_IS_EQUAL(x.rows(), n);

  // Check residual: ||Ax - b|| / ||b||.
  Vec r = A * x - b;
  RealScalar tol = RealScalar(100) * RealScalar(n) * NumTraits<Scalar>::epsilon();
  VERIFY(r.norm() / b.norm() < tol);
}

// ---- Compare with CPU SimplicialLLT -----------------------------------------

template <typename Scalar>
void test_vs_cpu(Index n) {
  using SpMat = SparseMatrix<Scalar, ColMajor, int>;
  using Vec = Matrix<Scalar, Dynamic, 1>;
  using RealScalar = typename NumTraits<Scalar>::Real;

  SpMat A = make_spd<Scalar>(n);
  Vec b = Vec::Random(n);

  GpuSparseLLT<Scalar> gpu_llt(A);
  VERIFY_IS_EQUAL(gpu_llt.info(), Success);
  Vec x_gpu = gpu_llt.solve(b);

  SimplicialLLT<SpMat> cpu_llt(A);
  VERIFY_IS_EQUAL(cpu_llt.info(), Success);
  Vec x_cpu = cpu_llt.solve(b);

  RealScalar tol = RealScalar(100) * RealScalar(n) * NumTraits<Scalar>::epsilon();
  VERIFY((x_gpu - x_cpu).norm() / x_cpu.norm() < tol);
}

// ---- Multiple RHS -----------------------------------------------------------

template <typename Scalar>
void test_multiple_rhs(Index n, Index nrhs) {
  using SpMat = SparseMatrix<Scalar, ColMajor, int>;
  using Mat = Matrix<Scalar, Dynamic, Dynamic>;
  using RealScalar = typename NumTraits<Scalar>::Real;

  SpMat A = make_spd<Scalar>(n);
  Mat B = Mat::Random(n, nrhs);

  GpuSparseLLT<Scalar> llt(A);
  VERIFY_IS_EQUAL(llt.info(), Success);

  Mat X = llt.solve(B);
  VERIFY_IS_EQUAL(X.rows(), n);
  VERIFY_IS_EQUAL(X.cols(), nrhs);

  Mat R = A * X - B;
  RealScalar tol = RealScalar(100) * RealScalar(n) * NumTraits<Scalar>::epsilon();
  VERIFY(R.norm() / B.norm() < tol);
}

// ---- Separate analyze + factorize (refactorization) -------------------------

template <typename Scalar>
void test_refactorize(Index n) {
  using SpMat = SparseMatrix<Scalar, ColMajor, int>;
  using Vec = Matrix<Scalar, Dynamic, 1>;
  using RealScalar = typename NumTraits<Scalar>::Real;

  SpMat A = make_spd<Scalar>(n);
  Vec b = Vec::Random(n);

  GpuSparseLLT<Scalar> llt;
  llt.analyzePattern(A);
  VERIFY_IS_EQUAL(llt.info(), Success);

  // First factorize + solve.
  llt.factorize(A);
  VERIFY_IS_EQUAL(llt.info(), Success);
  Vec x1 = llt.solve(b);

  // Modify values (keep same pattern): scale diagonal.
  SpMat A2 = A;
  for (Index i = 0; i < n; ++i) A2.coeffRef(i, i) *= Scalar(RealScalar(2));

  // Refactorize with same pattern.
  llt.factorize(A2);
  VERIFY_IS_EQUAL(llt.info(), Success);
  Vec x2 = llt.solve(b);

  // Both solutions should satisfy their respective systems.
  RealScalar tol = RealScalar(100) * RealScalar(n) * NumTraits<Scalar>::epsilon();
  VERIFY((A * x1 - b).norm() / b.norm() < tol);
  VERIFY((A2 * x2 - b).norm() / b.norm() < tol);

  // Solutions should differ (A2 != A).
  VERIFY((x1 - x2).norm() > NumTraits<Scalar>::epsilon());
}

// ---- Empty matrix -----------------------------------------------------------

void test_empty() {
  using SpMat = SparseMatrix<double, ColMajor, int>;
  SpMat A(0, 0);
  A.makeCompressed();
  GpuSparseLLT<double> llt(A);
  VERIFY_IS_EQUAL(llt.info(), Success);
  VERIFY_IS_EQUAL(llt.rows(), 0);
  VERIFY_IS_EQUAL(llt.cols(), 0);
}

// ---- Upper triangle ---------------------------------------------------------

template <typename Scalar>
void test_upper(Index n) {
  using SpMat = SparseMatrix<Scalar, ColMajor, int>;
  using Vec = Matrix<Scalar, Dynamic, 1>;
  using RealScalar = typename NumTraits<Scalar>::Real;

  SpMat A = make_spd<Scalar>(n);
  Vec b = Vec::Random(n);

  GpuSparseLLT<Scalar, Upper> llt(A);
  VERIFY_IS_EQUAL(llt.info(), Success);

  Vec x = llt.solve(b);
  Vec r = A * x - b;
  RealScalar tol = RealScalar(100) * RealScalar(n) * NumTraits<Scalar>::epsilon();
  VERIFY(r.norm() / b.norm() < tol);
}

// ---- Per-scalar driver ------------------------------------------------------

template <typename Scalar>
void test_scalar() {
  CALL_SUBTEST(test_solve<Scalar>(64));
  CALL_SUBTEST(test_solve<Scalar>(256));
  CALL_SUBTEST(test_vs_cpu<Scalar>(64));
  CALL_SUBTEST(test_multiple_rhs<Scalar>(64, 4));
  CALL_SUBTEST(test_refactorize<Scalar>(64));
  CALL_SUBTEST(test_upper<Scalar>(64));
}

EIGEN_DECLARE_TEST(gpu_cudss_llt) {
  CALL_SUBTEST(test_scalar<float>());
  CALL_SUBTEST(test_scalar<double>());
  CALL_SUBTEST(test_scalar<std::complex<float>>());
  CALL_SUBTEST(test_scalar<std::complex<double>>());
  CALL_SUBTEST(test_empty());
}
GPU: Add sparse solvers, FFT, and SpMV (cuDSS, cuFFT, cuSPARSE) Add GPU sparse direct solvers (Cholesky, LDL^T, LU) via cuDSS, 1D/2D FFT via cuFFT with plan caching, and sparse matrix-vector/matrix multiply (SpMV/SpMM) via cuSPARSE. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-04-09 19:11:49 -07:00			`// This file is part of Eigen, a lightweight C++ template library`
			`// for linear algebra.`
			`//`
			`// Copyright (C) 2026 Rasmus Munk Larsen <rmlarsen@gmail.com>`
			`//`
			`// This Source Code Form is subject to the terms of the Mozilla`
			`// Public License v. 2.0. If a copy of the MPL was not distributed`
			`// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.`

			`// Tests for GpuSparseLLT: GPU sparse Cholesky via cuDSS.`

			`#define EIGEN_USE_GPU`
			`#include "main.h"`
			`#include <Eigen/Sparse>`
			`#include <Eigen/GPU>`

			`using namespace Eigen;`

			`// ---- Helper: build a random sparse SPD matrix -------------------------------`

			`template <typename Scalar>`
			`SparseMatrix<Scalar, ColMajor, int> make_spd(Index n, double density = 0.1) {`
			`using SpMat = SparseMatrix<Scalar, ColMajor, int>;`
			`using RealScalar = typename NumTraits<Scalar>::Real;`

			`// Uses the global std::rand state seeded by the test framework (g_seed).`
			`SpMat R(n, n);`
			`R.reserve(VectorXi::Constant(n, static_cast<int>(n * density) + 1));`
			`for (Index j = 0; j < n; ++j) {`
			`for (Index i = 0; i < n; ++i) {`
			`if (i == j \|\| (std::rand() / double(RAND_MAX)) < density) {`
			`R.insert(i, j) = Scalar(std::rand() / double(RAND_MAX) - 0.5);`
			`}`
			`}`
			`}`
			`R.makeCompressed();`

			`// A = R^H * R + n * I (guaranteed SPD).`
			`SpMat A = R.adjoint() * R;`
			`for (Index i = 0; i < n; ++i) A.coeffRef(i, i) += Scalar(RealScalar(n));`
			`A.makeCompressed();`
			`return A;`
			`}`

			`// ---- Solve and check residual -----------------------------------------------`

			`template <typename Scalar>`
			`void test_solve(Index n) {`
			`using SpMat = SparseMatrix<Scalar, ColMajor, int>;`
			`using Vec = Matrix<Scalar, Dynamic, 1>;`
			`using RealScalar = typename NumTraits<Scalar>::Real;`

			`SpMat A = make_spd<Scalar>(n);`
			`Vec b = Vec::Random(n);`

			`GpuSparseLLT<Scalar> llt(A);`
			`VERIFY_IS_EQUAL(llt.info(), Success);`

			`Vec x = llt.solve(b);`
			`VERIFY_IS_EQUAL(x.rows(), n);`

			`// Check residual: \|\|Ax - b\|\| / \|\|b\|\|.`
			`Vec r = A * x - b;`
			`RealScalar tol = RealScalar(100) * RealScalar(n) * NumTraits<Scalar>::epsilon();`
			`VERIFY(r.norm() / b.norm() < tol);`
			`}`

			`// ---- Compare with CPU SimplicialLLT -----------------------------------------`

			`template <typename Scalar>`
			`void test_vs_cpu(Index n) {`
			`using SpMat = SparseMatrix<Scalar, ColMajor, int>;`
			`using Vec = Matrix<Scalar, Dynamic, 1>;`
			`using RealScalar = typename NumTraits<Scalar>::Real;`

			`SpMat A = make_spd<Scalar>(n);`
			`Vec b = Vec::Random(n);`

			`GpuSparseLLT<Scalar> gpu_llt(A);`
			`VERIFY_IS_EQUAL(gpu_llt.info(), Success);`
			`Vec x_gpu = gpu_llt.solve(b);`

			`SimplicialLLT<SpMat> cpu_llt(A);`
			`VERIFY_IS_EQUAL(cpu_llt.info(), Success);`
			`Vec x_cpu = cpu_llt.solve(b);`

			`RealScalar tol = RealScalar(100) * RealScalar(n) * NumTraits<Scalar>::epsilon();`
			`VERIFY((x_gpu - x_cpu).norm() / x_cpu.norm() < tol);`
			`}`

			`// ---- Multiple RHS -----------------------------------------------------------`

			`template <typename Scalar>`
			`void test_multiple_rhs(Index n, Index nrhs) {`
			`using SpMat = SparseMatrix<Scalar, ColMajor, int>;`
			`using Mat = Matrix<Scalar, Dynamic, Dynamic>;`
			`using RealScalar = typename NumTraits<Scalar>::Real;`

			`SpMat A = make_spd<Scalar>(n);`
			`Mat B = Mat::Random(n, nrhs);`

			`GpuSparseLLT<Scalar> llt(A);`
			`VERIFY_IS_EQUAL(llt.info(), Success);`

			`Mat X = llt.solve(B);`
			`VERIFY_IS_EQUAL(X.rows(), n);`
			`VERIFY_IS_EQUAL(X.cols(), nrhs);`

			`Mat R = A * X - B;`
			`RealScalar tol = RealScalar(100) * RealScalar(n) * NumTraits<Scalar>::epsilon();`
			`VERIFY(R.norm() / B.norm() < tol);`
			`}`

			`// ---- Separate analyze + factorize (refactorization) -------------------------`

			`template <typename Scalar>`
			`void test_refactorize(Index n) {`
			`using SpMat = SparseMatrix<Scalar, ColMajor, int>;`
			`using Vec = Matrix<Scalar, Dynamic, 1>;`
			`using RealScalar = typename NumTraits<Scalar>::Real;`

			`SpMat A = make_spd<Scalar>(n);`
			`Vec b = Vec::Random(n);`

			`GpuSparseLLT<Scalar> llt;`
			`llt.analyzePattern(A);`
			`VERIFY_IS_EQUAL(llt.info(), Success);`

			`// First factorize + solve.`
			`llt.factorize(A);`
			`VERIFY_IS_EQUAL(llt.info(), Success);`
			`Vec x1 = llt.solve(b);`

			`// Modify values (keep same pattern): scale diagonal.`
			`SpMat A2 = A;`
			`for (Index i = 0; i < n; ++i) A2.coeffRef(i, i) *= Scalar(RealScalar(2));`

			`// Refactorize with same pattern.`
			`llt.factorize(A2);`
			`VERIFY_IS_EQUAL(llt.info(), Success);`
			`Vec x2 = llt.solve(b);`

			`// Both solutions should satisfy their respective systems.`
			`RealScalar tol = RealScalar(100) * RealScalar(n) * NumTraits<Scalar>::epsilon();`
			`VERIFY((A * x1 - b).norm() / b.norm() < tol);`
			`VERIFY((A2 * x2 - b).norm() / b.norm() < tol);`

			`// Solutions should differ (A2 != A).`
			`VERIFY((x1 - x2).norm() > NumTraits<Scalar>::epsilon());`
			`}`

			`// ---- Empty matrix -----------------------------------------------------------`

			`void test_empty() {`
			`using SpMat = SparseMatrix<double, ColMajor, int>;`
			`SpMat A(0, 0);`
			`A.makeCompressed();`
			`GpuSparseLLT<double> llt(A);`
			`VERIFY_IS_EQUAL(llt.info(), Success);`
			`VERIFY_IS_EQUAL(llt.rows(), 0);`
			`VERIFY_IS_EQUAL(llt.cols(), 0);`
			`}`

			`// ---- Upper triangle ---------------------------------------------------------`

			`template <typename Scalar>`
			`void test_upper(Index n) {`
			`using SpMat = SparseMatrix<Scalar, ColMajor, int>;`
			`using Vec = Matrix<Scalar, Dynamic, 1>;`
			`using RealScalar = typename NumTraits<Scalar>::Real;`

			`SpMat A = make_spd<Scalar>(n);`
			`Vec b = Vec::Random(n);`

			`GpuSparseLLT<Scalar, Upper> llt(A);`
			`VERIFY_IS_EQUAL(llt.info(), Success);`

			`Vec x = llt.solve(b);`
			`Vec r = A * x - b;`
			`RealScalar tol = RealScalar(100) * RealScalar(n) * NumTraits<Scalar>::epsilon();`
			`VERIFY(r.norm() / b.norm() < tol);`
			`}`

			`// ---- Per-scalar driver ------------------------------------------------------`

			`template <typename Scalar>`
			`void test_scalar() {`
			`CALL_SUBTEST(test_solve<Scalar>(64));`
			`CALL_SUBTEST(test_solve<Scalar>(256));`
			`CALL_SUBTEST(test_vs_cpu<Scalar>(64));`
			`CALL_SUBTEST(test_multiple_rhs<Scalar>(64, 4));`
			`CALL_SUBTEST(test_refactorize<Scalar>(64));`
			`CALL_SUBTEST(test_upper<Scalar>(64));`
			`}`

			`EIGEN_DECLARE_TEST(gpu_cudss_llt) {`
			`CALL_SUBTEST(test_scalar<float>());`
			`CALL_SUBTEST(test_scalar<double>());`
			`CALL_SUBTEST(test_scalar<std::complex<float>>());`
			`CALL_SUBTEST(test_scalar<std::complex<double>>());`
			`CALL_SUBTEST(test_empty());`
			`}`