// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2026 Rasmus Munk Larsen // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // Tests for DeviceMatrix and HostTransfer: typed RAII GPU memory wrapper. // No cuSOLVER dependency — only CUDA runtime. #define EIGEN_USE_GPU #include "main.h" #include using namespace Eigen; // ---- Default construction --------------------------------------------------- void test_default_construct() { DeviceMatrix dm; VERIFY(dm.empty()); VERIFY_IS_EQUAL(dm.rows(), 0); VERIFY_IS_EQUAL(dm.cols(), 0); VERIFY(dm.data() == nullptr); VERIFY_IS_EQUAL(dm.sizeInBytes(), size_t(0)); } // ---- Allocate uninitialized ------------------------------------------------- template void test_allocate(Index rows, Index cols) { DeviceMatrix dm(rows, cols); VERIFY(!dm.empty()); VERIFY_IS_EQUAL(dm.rows(), rows); VERIFY_IS_EQUAL(dm.cols(), cols); VERIFY(dm.data() != nullptr); VERIFY_IS_EQUAL(dm.sizeInBytes(), size_t(rows) * size_t(cols) * sizeof(Scalar)); } // ---- fromHost / toHost roundtrip (synchronous) ------------------------------ template void test_roundtrip(Index rows, Index cols) { using MatrixType = Matrix; MatrixType host = MatrixType::Random(rows, cols); auto dm = DeviceMatrix::fromHost(host); VERIFY_IS_EQUAL(dm.rows(), rows); VERIFY_IS_EQUAL(dm.cols(), cols); VERIFY(!dm.empty()); MatrixType result = dm.toHost(); VERIFY_IS_EQUAL(result.rows(), rows); VERIFY_IS_EQUAL(result.cols(), cols); VERIFY_IS_APPROX(result, host); } // ---- fromHostAsync / toHostAsync roundtrip ----------------------------------- template void test_roundtrip_async(Index rows, Index cols) { using MatrixType = Matrix; MatrixType host = MatrixType::Random(rows, cols); cudaStream_t stream; EIGEN_CUDA_RUNTIME_CHECK(cudaStreamCreate(&stream)); // Async upload from raw pointer. auto dm = DeviceMatrix::fromHostAsync(host.data(), rows, cols, stream); VERIFY_IS_EQUAL(dm.rows(), rows); VERIFY_IS_EQUAL(dm.cols(), cols); // Async download via HostTransfer future. auto transfer = dm.toHostAsync(stream); // get() blocks and returns the matrix. MatrixType result = transfer.get(); VERIFY_IS_APPROX(result, host); EIGEN_CUDA_RUNTIME_CHECK(cudaStreamDestroy(stream)); } // ---- HostTransfer::ready() and idempotent get() ----------------------------- void test_host_transfer_ready() { using MatrixType = Matrix; MatrixType host = MatrixType::Random(100, 100); auto dm = DeviceMatrix::fromHost(host); auto transfer = dm.toHostAsync(); // After get(), ready() must return true. MatrixType result = transfer.get(); VERIFY(transfer.ready()); VERIFY_IS_APPROX(result, host); // get() is idempotent. MatrixType& result2 = transfer.get(); VERIFY_IS_APPROX(result2, host); } // ---- HostTransfer move ------------------------------------------------------ void test_host_transfer_move() { using MatrixType = Matrix; MatrixType host = MatrixType::Random(50, 50); auto dm = DeviceMatrix::fromHost(host); auto transfer = dm.toHostAsync(); HostTransfer moved(std::move(transfer)); MatrixType result = moved.get(); VERIFY_IS_APPROX(result, host); } // ---- clone() produces independent copy -------------------------------------- template void test_clone(Index rows, Index cols) { using MatrixType = Matrix; MatrixType host = MatrixType::Random(rows, cols); auto dm = DeviceMatrix::fromHost(host); auto cloned = dm.clone(); // Overwrite original with different data. MatrixType other = MatrixType::Random(rows, cols); dm = DeviceMatrix::fromHost(other); // Clone still holds the original data. MatrixType clone_result = cloned.toHost(); VERIFY_IS_APPROX(clone_result, host); // Original holds the new data. MatrixType dm_result = dm.toHost(); VERIFY_IS_APPROX(dm_result, other); } // ---- Move construct --------------------------------------------------------- template void test_move_construct(Index rows, Index cols) { using MatrixType = Matrix; MatrixType host = MatrixType::Random(rows, cols); auto dm = DeviceMatrix::fromHost(host); DeviceMatrix moved(std::move(dm)); VERIFY(dm.empty()); VERIFY(dm.data() == nullptr); VERIFY_IS_EQUAL(moved.rows(), rows); VERIFY_IS_EQUAL(moved.cols(), cols); MatrixType result = moved.toHost(); VERIFY_IS_APPROX(result, host); } // ---- Move assign ------------------------------------------------------------ template void test_move_assign(Index rows, Index cols) { using MatrixType = Matrix; MatrixType host = MatrixType::Random(rows, cols); auto dm = DeviceMatrix::fromHost(host); DeviceMatrix dest; dest = std::move(dm); VERIFY(dm.empty()); VERIFY_IS_EQUAL(dest.rows(), rows); MatrixType result = dest.toHost(); VERIFY_IS_APPROX(result, host); } // ---- resize() --------------------------------------------------------------- void test_resize() { DeviceMatrix dm(10, 20); VERIFY_IS_EQUAL(dm.rows(), 10); VERIFY_IS_EQUAL(dm.cols(), 20); dm.resize(50, 30); VERIFY_IS_EQUAL(dm.rows(), 50); VERIFY_IS_EQUAL(dm.cols(), 30); VERIFY(dm.data() != nullptr); // Resize to same dimensions is a no-op. double* ptr_before = dm.data(); dm.resize(50, 30); VERIFY(dm.data() == ptr_before); } // ---- Empty / 0x0 matrix ----------------------------------------------------- void test_empty() { using MatrixType = Matrix; MatrixType empty_mat(0, 0); auto dm = DeviceMatrix::fromHost(empty_mat); VERIFY(dm.empty()); VERIFY_IS_EQUAL(dm.rows(), 0); VERIFY_IS_EQUAL(dm.cols(), 0); MatrixType result = dm.toHost(); VERIFY_IS_EQUAL(result.rows(), 0); VERIFY_IS_EQUAL(result.cols(), 0); } // ---- Per-scalar driver ------------------------------------------------------ template void test_scalar() { // Square. CALL_SUBTEST(test_roundtrip(1, 1)); CALL_SUBTEST(test_roundtrip(64, 64)); CALL_SUBTEST(test_roundtrip(256, 256)); // Rectangular. CALL_SUBTEST(test_roundtrip(100, 7)); CALL_SUBTEST(test_roundtrip(7, 100)); // Async roundtrip. CALL_SUBTEST(test_roundtrip_async(64, 64)); CALL_SUBTEST(test_roundtrip_async(100, 7)); CALL_SUBTEST(test_clone(64, 64)); CALL_SUBTEST(test_move_construct(64, 64)); CALL_SUBTEST(test_move_assign(64, 64)); } EIGEN_DECLARE_TEST(gpu_device_matrix) { CALL_SUBTEST(test_default_construct()); CALL_SUBTEST(test_empty()); CALL_SUBTEST(test_resize()); CALL_SUBTEST(test_host_transfer_ready()); CALL_SUBTEST(test_host_transfer_move()); CALL_SUBTEST((test_allocate(100, 50))); CALL_SUBTEST((test_allocate(100, 50))); CALL_SUBTEST(test_scalar()); CALL_SUBTEST(test_scalar()); CALL_SUBTEST(test_scalar>()); CALL_SUBTEST(test_scalar>()); }