// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2026 Rasmus Munk Larsen // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // Smoke test for GPU library test infrastructure. // Verifies GpuContext, GpuBuffer, and host<->device matrix transfers // without requiring any NVIDIA library (cuBLAS, cuSOLVER, etc.). #define EIGEN_USE_GPU #include "main.h" #include "gpu_context.h" #include "gpu_library_test_helper.h" using namespace Eigen; using namespace Eigen::test; // Test that GpuContext initializes, reports valid device info, and owns a cuSOLVER handle. void test_gpu_context() { GpuContext ctx; VERIFY(ctx.device() >= 0); VERIFY(ctx.deviceProperties().major >= 7); // sm_70 minimum VERIFY(ctx.stream != nullptr); VERIFY(ctx.cusolver != nullptr); std::cout << " GPU: " << ctx.deviceProperties().name << " (sm_" << ctx.deviceProperties().major << ctx.deviceProperties().minor << ")\n"; } // Test dense matrix roundtrip: host -> device -> host. template void test_dense_roundtrip() { GpuContext ctx; const Index rows = 64; const Index cols = 32; MatrixType A = MatrixType::Random(rows, cols); auto buf = gpu_copy_to_device(ctx.stream, A); VERIFY(buf.data != nullptr); VERIFY(buf.size == rows * cols); MatrixType B(rows, cols); B.setZero(); gpu_copy_to_host(ctx.stream, buf, B); ctx.synchronize(); VERIFY_IS_EQUAL(A, B); } // Test GpuBuffer RAII: move semantics, async zero-init. void test_gpu_buffer() { GpuContext ctx; GpuBuffer a(128); VERIFY(a.data != nullptr); VERIFY(a.size == 128); // Move construction. GpuBuffer b(std::move(a)); VERIFY(a.data == nullptr); VERIFY(b.data != nullptr); VERIFY(b.size == 128); // Move assignment. GpuBuffer c; c = std::move(b); VERIFY(b.data == nullptr); VERIFY(c.data != nullptr); // setZeroAsync. c.setZeroAsync(ctx.stream); ctx.synchronize(); std::vector host(128); GPU_CHECK(cudaMemcpy(host.data(), c.data, 128 * sizeof(float), cudaMemcpyDeviceToHost)); for (int i = 0; i < 128; ++i) { VERIFY_IS_EQUAL(host[i], 0.0f); } } // Test with vectors (1D). template void test_vector_roundtrip() { GpuContext ctx; const Index n = 256; Matrix v = Matrix::Random(n); auto buf = gpu_copy_to_device(ctx.stream, v); Matrix w(n); w.setZero(); gpu_copy_to_host(ctx.stream, buf, w); ctx.synchronize(); VERIFY_IS_EQUAL(v, w); } EIGEN_DECLARE_TEST(gpu_library_example) { CALL_SUBTEST(test_gpu_context()); CALL_SUBTEST(test_gpu_buffer()); CALL_SUBTEST(test_dense_roundtrip()); CALL_SUBTEST(test_dense_roundtrip()); CALL_SUBTEST((test_dense_roundtrip>())); CALL_SUBTEST((test_dense_roundtrip>())); CALL_SUBTEST(test_vector_roundtrip()); CALL_SUBTEST(test_vector_roundtrip()); CALL_SUBTEST(test_vector_roundtrip>()); }