// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2026 Rasmus Munk Larsen // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_TEST_GPU_LIBRARY_TEST_HELPER_H #define EIGEN_TEST_GPU_LIBRARY_TEST_HELPER_H // Helpers for GPU tests that call NVIDIA library APIs (cuBLAS, cuSOLVER, etc.) // from the host side. Provides RAII GPU memory management and async matrix transfer. // // This is separate from gpu_common.h (element-parallel device kernels) and // gpu_test_helper.h (serialization-based device kernels). Those patterns run // user functors inside GPU kernels. This helper is for host-orchestrated tests // that call library APIs which launch their own kernels internally. // // All transfers use an explicit stream and cudaMemcpyAsync. Callers must // synchronize (ctx.synchronize() or cudaStreamSynchronize) before reading // results back on the host. #include "gpu_test_helper.h" namespace Eigen { namespace test { // RAII wrapper for GPU device memory. Prevents leaks when VERIFY macros abort. template struct GpuBuffer { Scalar* data = nullptr; Index size = 0; GpuBuffer() = default; explicit GpuBuffer(Index n) : size(n) { GPU_CHECK(gpuMalloc(reinterpret_cast(&data), n * sizeof(Scalar))); } ~GpuBuffer() { if (data) GPU_CHECK(gpuFree(data)); } // Move-only. GpuBuffer(GpuBuffer&& other) noexcept : data(other.data), size(other.size) { other.data = nullptr; other.size = 0; } GpuBuffer& operator=(GpuBuffer&& other) noexcept { if (this != &other) { if (data) GPU_CHECK(gpuFree(data)); data = other.data; size = other.size; other.data = nullptr; other.size = 0; } return *this; } GpuBuffer(const GpuBuffer&) = delete; GpuBuffer& operator=(const GpuBuffer&) = delete; // Async zero the buffer on the given stream. void setZeroAsync(cudaStream_t stream) { GPU_CHECK(cudaMemsetAsync(data, 0, size * sizeof(Scalar), stream)); } }; // Copy a dense Eigen matrix to a new GPU buffer, async on the given stream. // Caller must synchronize before the host matrix is freed or modified. template GpuBuffer gpu_copy_to_device(cudaStream_t stream, const MatrixBase& host_mat) { using Scalar = typename Derived::Scalar; const auto& mat = host_mat.derived(); GpuBuffer buf(mat.size()); GPU_CHECK(cudaMemcpyAsync(buf.data, mat.data(), mat.size() * sizeof(Scalar), cudaMemcpyHostToDevice, stream)); return buf; } // Copy GPU buffer contents back to a dense Eigen matrix, async on the given stream. // Caller must synchronize before reading from host_mat. template void gpu_copy_to_host(cudaStream_t stream, const GpuBuffer& buf, MatrixBase& host_mat) { auto& mat = host_mat.derived(); eigen_assert(buf.size == mat.size()); GPU_CHECK(cudaMemcpyAsync(mat.data(), buf.data, mat.size() * sizeof(Scalar), cudaMemcpyDeviceToHost, stream)); } } // namespace test } // namespace Eigen #endif // EIGEN_TEST_GPU_LIBRARY_TEST_HELPER_H