// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2026 Rasmus Munk Larsen // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // Tests for GpuSparseContext: GPU SpMV/SpMM via cuSPARSE. #define EIGEN_USE_GPU #include "main.h" #include #include using namespace Eigen; // ---- Helper: build a random sparse matrix ----------------------------------- template SparseMatrix make_sparse(Index rows, Index cols, double density = 0.1) { using SpMat = SparseMatrix; using RealScalar = typename NumTraits::Real; SpMat R(rows, cols); R.reserve(VectorXi::Constant(cols, static_cast(rows * density) + 1)); for (Index j = 0; j < cols; ++j) { for (Index i = 0; i < rows; ++i) { if ((std::rand() / double(RAND_MAX)) < density) { R.insert(i, j) = Scalar(RealScalar(std::rand() / double(RAND_MAX) - 0.5)); } } } R.makeCompressed(); return R; } // ---- SpMV: y = A * x ------------------------------------------------------- template void test_spmv(Index rows, Index cols) { using SpMat = SparseMatrix; using Vec = Matrix; using RealScalar = typename NumTraits::Real; SpMat A = make_sparse(rows, cols); Vec x = Vec::Random(cols); GpuSparseContext ctx; Vec y_gpu = ctx.multiply(A, x); Vec y_cpu = A * x; RealScalar tol = RealScalar(10) * RealScalar((std::max)(rows, cols)) * NumTraits::epsilon(); VERIFY_IS_EQUAL(y_gpu.size(), rows); VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol); } // ---- SpMV with alpha/beta: y = alpha*A*x + beta*y --------------------------- template void test_spmv_alpha_beta(Index n) { using SpMat = SparseMatrix; using Vec = Matrix; using RealScalar = typename NumTraits::Real; SpMat A = make_sparse(n, n); Vec x = Vec::Random(n); Vec y_init = Vec::Random(n); Scalar alpha(2); Scalar beta(3); Vec y_cpu = alpha * (A * x) + beta * y_init; GpuSparseContext ctx; Vec y_gpu = y_init; ctx.multiply(A, x, y_gpu, alpha, beta); RealScalar tol = RealScalar(10) * RealScalar(n) * NumTraits::epsilon(); VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol); } // ---- Transpose: y = A^T * x ------------------------------------------------ template void test_spmv_transpose(Index rows, Index cols) { using SpMat = SparseMatrix; using Vec = Matrix; using RealScalar = typename NumTraits::Real; SpMat A = make_sparse(rows, cols); Vec x = Vec::Random(rows); GpuSparseContext ctx; Vec y_gpu = ctx.multiplyT(A, x); Vec y_cpu = A.transpose() * x; RealScalar tol = RealScalar(10) * RealScalar((std::max)(rows, cols)) * NumTraits::epsilon(); VERIFY_IS_EQUAL(y_gpu.size(), cols); VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol); } // ---- SpMM: Y = A * X (multiple RHS) ---------------------------------------- template void test_spmm(Index rows, Index cols, Index nrhs) { using SpMat = SparseMatrix; using Mat = Matrix; using RealScalar = typename NumTraits::Real; SpMat A = make_sparse(rows, cols); Mat X = Mat::Random(cols, nrhs); GpuSparseContext ctx; Mat Y_gpu = ctx.multiplyMat(A, X); Mat Y_cpu = A * X; RealScalar tol = RealScalar(10) * RealScalar((std::max)(rows, cols)) * NumTraits::epsilon(); VERIFY_IS_EQUAL(Y_gpu.rows(), rows); VERIFY_IS_EQUAL(Y_gpu.cols(), nrhs); VERIFY((Y_gpu - Y_cpu).norm() / (Y_cpu.norm() + RealScalar(1)) < tol); } // ---- Identity matrix: I * x = x -------------------------------------------- template void test_identity(Index n) { using SpMat = SparseMatrix; using Vec = Matrix; using RealScalar = typename NumTraits::Real; // Build sparse identity. SpMat eye(n, n); eye.setIdentity(); eye.makeCompressed(); Vec x = Vec::Random(n); GpuSparseContext ctx; Vec y = ctx.multiply(eye, x); RealScalar tol = NumTraits::epsilon(); VERIFY((y - x).norm() < tol); } // ---- Context reuse ---------------------------------------------------------- template void test_reuse(Index n) { using SpMat = SparseMatrix; using Vec = Matrix; using RealScalar = typename NumTraits::Real; GpuSparseContext ctx; RealScalar tol = RealScalar(10) * RealScalar(n) * NumTraits::epsilon(); for (int trial = 0; trial < 3; ++trial) { SpMat A = make_sparse(n, n); Vec x = Vec::Random(n); Vec y_gpu = ctx.multiply(A, x); Vec y_cpu = A * x; VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol); } } // ---- Empty ------------------------------------------------------------------ template void test_empty() { using SpMat = SparseMatrix; using Vec = Matrix; SpMat A(0, 0); A.makeCompressed(); Vec x(0); GpuSparseContext ctx; Vec y = ctx.multiply(A, x); VERIFY_IS_EQUAL(y.size(), 0); } // ---- DeviceMatrix SpMV (no host roundtrip) ---------------------------------- template void test_spmv_device(Index n) { using SpMat = SparseMatrix; using Vec = Matrix; using RealScalar = typename NumTraits::Real; SpMat A = make_sparse(n, n); Vec x = Vec::Random(n); // Use shared GpuContext for same-stream execution. GpuContext gpu_ctx; GpuSparseContext ctx(gpu_ctx); auto d_x = DeviceMatrix::fromHost(x, gpu_ctx.stream()); DeviceMatrix d_y; ctx.multiply(A, d_x, d_y); Vec y_gpu = d_y.toHost(gpu_ctx.stream()); Vec y_cpu = A * x; RealScalar tol = RealScalar(10) * RealScalar(n) * NumTraits::epsilon(); VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol); } // ---- Expression syntax: d_y = d_A * d_x ------------------------------------ template void test_spmv_expr(Index n) { using SpMat = SparseMatrix; using Vec = Matrix; using RealScalar = typename NumTraits::Real; SpMat A = make_sparse(n, n); Vec x = Vec::Random(n); GpuContext gpu_ctx; GpuSparseContext ctx(gpu_ctx); // Upload sparse matrix and create device view. auto d_A = ctx.deviceView(A); // Upload x. auto d_x = DeviceMatrix::fromHost(x, gpu_ctx.stream()); // Expression syntax: d_y = d_A * d_x DeviceMatrix d_y; d_y = d_A * d_x; // Also test with noalias(): DeviceMatrix d_tmp; d_tmp.noalias() = d_A * d_x; Vec y_gpu = d_y.toHost(gpu_ctx.stream()); Vec tmp_gpu = d_tmp.toHost(gpu_ctx.stream()); Vec y_cpu = A * x; RealScalar tol = RealScalar(10) * RealScalar(n) * NumTraits::epsilon(); VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol); VERIFY((tmp_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol); } // ---- deviceView overwrite: second view replaces first ----------------------- template void test_deviceview_overwrite(Index n) { using SpMat = SparseMatrix; using Vec = Matrix; using RealScalar = typename NumTraits::Real; SpMat A1 = make_sparse(n, n); SpMat A2 = make_sparse(n, n); // different random matrix Vec x = Vec::Random(n); GpuContext gpu_ctx; GpuSparseContext ctx(gpu_ctx); // First view: A1. auto d_A1 = ctx.deviceView(A1); auto d_x = DeviceMatrix::fromHost(x, gpu_ctx.stream()); DeviceMatrix d_y1; d_y1 = d_A1 * d_x; Vec y1_gpu = d_y1.toHost(gpu_ctx.stream()); Vec y1_cpu = A1 * x; RealScalar tol = RealScalar(10) * RealScalar(n) * NumTraits::epsilon(); VERIFY((y1_gpu - y1_cpu).norm() / (y1_cpu.norm() + RealScalar(1)) < tol); // Second view overwrites first: now uses A2. auto d_A2 = ctx.deviceView(A2); DeviceMatrix d_y2; d_y2 = d_A2 * d_x; Vec y2_gpu = d_y2.toHost(gpu_ctx.stream()); Vec y2_cpu = A2 * x; VERIFY((y2_gpu - y2_cpu).norm() / (y2_cpu.norm() + RealScalar(1)) < tol); } // ---- Per-scalar driver ------------------------------------------------------ template void test_scalar() { CALL_SUBTEST(test_spmv(64, 64)); CALL_SUBTEST(test_spmv(128, 64)); // non-square CALL_SUBTEST(test_spmv(64, 128)); // wide CALL_SUBTEST(test_spmv_alpha_beta(64)); CALL_SUBTEST(test_spmv_transpose(128, 64)); CALL_SUBTEST(test_spmm(64, 64, 4)); CALL_SUBTEST(test_identity(64)); CALL_SUBTEST(test_reuse(64)); CALL_SUBTEST(test_empty()); CALL_SUBTEST(test_spmv_device(64)); CALL_SUBTEST(test_spmv_expr(64)); CALL_SUBTEST(test_deviceview_overwrite(64)); } EIGEN_DECLARE_TEST(gpu_cusparse_spmv) { CALL_SUBTEST(test_scalar()); CALL_SUBTEST(test_scalar()); CALL_SUBTEST(test_scalar>()); CALL_SUBTEST(test_scalar>()); }