mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Pulled latest updates from upstream
This commit is contained in:
@@ -116,6 +116,8 @@ if(EIGEN_TEST_CXX11)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
ei_add_test(cxx11_float16)
|
||||
ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_meta)
|
||||
ei_add_test(cxx11_tensor_simple)
|
||||
# ei_add_test(cxx11_tensor_symmetry)
|
||||
|
||||
140
unsupported/test/cxx11_eventcount.cpp
Normal file
140
unsupported/test/cxx11_eventcount.cpp
Normal file
@@ -0,0 +1,140 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
|
||||
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#define EIGEN_USE_THREADS
|
||||
#include "main.h"
|
||||
#include <Eigen/CXX11/ThreadPool>
|
||||
|
||||
// Visual studio doesn't implement a rand_r() function since its
|
||||
// implementation of rand() is already thread safe
|
||||
int rand_reentrant(unsigned int* s) {
|
||||
#ifdef EIGEN_COMP_MSVC_STRICT
|
||||
EIGEN_UNUSED_VARIABLE(s);
|
||||
return rand();
|
||||
#else
|
||||
return rand_r(s);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void test_basic_eventcount()
|
||||
{
|
||||
std::vector<EventCount::Waiter> waiters(1);
|
||||
EventCount ec(waiters);
|
||||
EventCount::Waiter& w = waiters[0];
|
||||
ec.Notify(false);
|
||||
ec.Prewait(&w);
|
||||
ec.Notify(true);
|
||||
ec.CommitWait(&w);
|
||||
ec.Prewait(&w);
|
||||
ec.CancelWait(&w);
|
||||
}
|
||||
|
||||
// Fake bounded counter-based queue.
|
||||
struct TestQueue {
|
||||
std::atomic<int> val_;
|
||||
static const int kQueueSize = 10;
|
||||
|
||||
TestQueue() : val_() {}
|
||||
|
||||
~TestQueue() { VERIFY_IS_EQUAL(val_.load(), 0); }
|
||||
|
||||
bool Push() {
|
||||
int val = val_.load(std::memory_order_relaxed);
|
||||
for (;;) {
|
||||
VERIFY_GE(val, 0);
|
||||
VERIFY_LE(val, kQueueSize);
|
||||
if (val == kQueueSize) return false;
|
||||
if (val_.compare_exchange_weak(val, val + 1, std::memory_order_relaxed))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool Pop() {
|
||||
int val = val_.load(std::memory_order_relaxed);
|
||||
for (;;) {
|
||||
VERIFY_GE(val, 0);
|
||||
VERIFY_LE(val, kQueueSize);
|
||||
if (val == 0) return false;
|
||||
if (val_.compare_exchange_weak(val, val - 1, std::memory_order_relaxed))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool Empty() { return val_.load(std::memory_order_relaxed) == 0; }
|
||||
};
|
||||
|
||||
const int TestQueue::kQueueSize;
|
||||
|
||||
// A number of producers send messages to a set of consumers using a set of
|
||||
// fake queues. Ensure that it does not crash, consumers don't deadlock and
|
||||
// number of blocked and unblocked threads match.
|
||||
static void test_stress_eventcount()
|
||||
{
|
||||
const int kThreads = std::thread::hardware_concurrency();
|
||||
static const int kEvents = 1 << 16;
|
||||
static const int kQueues = 10;
|
||||
|
||||
std::vector<EventCount::Waiter> waiters(kThreads);
|
||||
EventCount ec(waiters);
|
||||
TestQueue queues[kQueues];
|
||||
|
||||
std::vector<std::unique_ptr<std::thread>> producers;
|
||||
for (int i = 0; i < kThreads; i++) {
|
||||
producers.emplace_back(new std::thread([&ec, &queues]() {
|
||||
unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id()));
|
||||
for (int j = 0; j < kEvents; j++) {
|
||||
unsigned idx = rand_reentrant(&rnd) % kQueues;
|
||||
if (queues[idx].Push()) {
|
||||
ec.Notify(false);
|
||||
continue;
|
||||
}
|
||||
EIGEN_THREAD_YIELD();
|
||||
j--;
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<std::thread>> consumers;
|
||||
for (int i = 0; i < kThreads; i++) {
|
||||
consumers.emplace_back(new std::thread([&ec, &queues, &waiters, i]() {
|
||||
EventCount::Waiter& w = waiters[i];
|
||||
unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id()));
|
||||
for (int j = 0; j < kEvents; j++) {
|
||||
unsigned idx = rand_reentrant(&rnd) % kQueues;
|
||||
if (queues[idx].Pop()) continue;
|
||||
j--;
|
||||
ec.Prewait(&w);
|
||||
bool empty = true;
|
||||
for (int q = 0; q < kQueues; q++) {
|
||||
if (!queues[q].Empty()) {
|
||||
empty = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!empty) {
|
||||
ec.CancelWait(&w);
|
||||
continue;
|
||||
}
|
||||
ec.CommitWait(&w);
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
for (int i = 0; i < kThreads; i++) {
|
||||
producers[i]->join();
|
||||
consumers[i]->join();
|
||||
}
|
||||
}
|
||||
|
||||
void test_cxx11_eventcount()
|
||||
{
|
||||
CALL_SUBTEST(test_basic_eventcount());
|
||||
CALL_SUBTEST(test_stress_eventcount());
|
||||
}
|
||||
@@ -122,6 +122,8 @@ void test_comparison()
|
||||
VERIFY(half(1.0f) != half(2.0f));
|
||||
|
||||
// Comparisons with NaNs and infinities.
|
||||
#if !EIGEN_COMP_MSVC
|
||||
// Visual Studio errors out on divisions by 0
|
||||
VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0)));
|
||||
VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0));
|
||||
|
||||
@@ -132,13 +134,26 @@ void test_comparison()
|
||||
|
||||
VERIFY(half(1.0) < half(1.0 / 0.0));
|
||||
VERIFY(half(1.0) > half(-1.0 / 0.0));
|
||||
#endif
|
||||
}
|
||||
|
||||
void test_functions()
|
||||
void test_basic_functions()
|
||||
{
|
||||
VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f);
|
||||
VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f);
|
||||
|
||||
VERIFY_IS_EQUAL(float(numext::floor(half(3.5f))), 3.0f);
|
||||
VERIFY_IS_EQUAL(float(numext::floor(half(-3.5f))), -4.0f);
|
||||
|
||||
VERIFY_IS_EQUAL(float(numext::ceil(half(3.5f))), 4.0f);
|
||||
VERIFY_IS_EQUAL(float(numext::ceil(half(-3.5f))), -3.0f);
|
||||
|
||||
VERIFY_IS_APPROX(float(numext::sqrt(half(0.0f))), 0.0f);
|
||||
VERIFY_IS_APPROX(float(numext::sqrt(half(4.0f))), 2.0f);
|
||||
|
||||
VERIFY_IS_APPROX(float(numext::pow(half(0.0f), half(1.0f))), 0.0f);
|
||||
VERIFY_IS_APPROX(float(numext::pow(half(2.0f), half(2.0f))), 4.0f);
|
||||
|
||||
VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f);
|
||||
VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI));
|
||||
|
||||
@@ -146,10 +161,32 @@ void test_functions()
|
||||
VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f);
|
||||
}
|
||||
|
||||
void test_trigonometric_functions()
|
||||
{
|
||||
VERIFY_IS_APPROX(numext::cos(half(0.0f)), half(cosf(0.0f)));
|
||||
VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI)), half(cosf(EIGEN_PI)));
|
||||
//VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI/2)), half(cosf(EIGEN_PI/2)));
|
||||
//VERIFY_IS_APPROX(numext::cos(half(3*EIGEN_PI/2)), half(cosf(3*EIGEN_PI/2)));
|
||||
VERIFY_IS_APPROX(numext::cos(half(3.5f)), half(cosf(3.5f)));
|
||||
|
||||
VERIFY_IS_APPROX(numext::sin(half(0.0f)), half(sinf(0.0f)));
|
||||
// VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI)), half(sinf(EIGEN_PI)));
|
||||
VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI/2)), half(sinf(EIGEN_PI/2)));
|
||||
VERIFY_IS_APPROX(numext::sin(half(3*EIGEN_PI/2)), half(sinf(3*EIGEN_PI/2)));
|
||||
VERIFY_IS_APPROX(numext::sin(half(3.5f)), half(sinf(3.5f)));
|
||||
|
||||
VERIFY_IS_APPROX(numext::tan(half(0.0f)), half(tanf(0.0f)));
|
||||
// VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI)), half(tanf(EIGEN_PI)));
|
||||
// VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI/2)), half(tanf(EIGEN_PI/2)));
|
||||
//VERIFY_IS_APPROX(numext::tan(half(3*EIGEN_PI/2)), half(tanf(3*EIGEN_PI/2)));
|
||||
VERIFY_IS_APPROX(numext::tan(half(3.5f)), half(tanf(3.5f)));
|
||||
}
|
||||
|
||||
void test_cxx11_float16()
|
||||
{
|
||||
CALL_SUBTEST(test_conversion());
|
||||
CALL_SUBTEST(test_arithmetic());
|
||||
CALL_SUBTEST(test_comparison());
|
||||
CALL_SUBTEST(test_functions());
|
||||
CALL_SUBTEST(test_basic_functions());
|
||||
CALL_SUBTEST(test_trigonometric_functions());
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#include "main.h"
|
||||
|
||||
#include <array>
|
||||
#include <Eigen/CXX11/Core>
|
||||
#include <Eigen/CXX11/src/util/CXX11Meta.h>
|
||||
|
||||
using Eigen::internal::is_same;
|
||||
using Eigen::internal::type_list;
|
||||
|
||||
227
unsupported/test/cxx11_runqueue.cpp
Normal file
227
unsupported/test/cxx11_runqueue.cpp
Normal file
@@ -0,0 +1,227 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
|
||||
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#define EIGEN_USE_THREADS
|
||||
#include <cstdlib>
|
||||
#include "main.h"
|
||||
#include <Eigen/CXX11/ThreadPool>
|
||||
|
||||
|
||||
// Visual studio doesn't implement a rand_r() function since its
|
||||
// implementation of rand() is already thread safe
|
||||
int rand_reentrant(unsigned int* s) {
|
||||
#ifdef EIGEN_COMP_MSVC_STRICT
|
||||
EIGEN_UNUSED_VARIABLE(s);
|
||||
return rand();
|
||||
#else
|
||||
return rand_r(s);
|
||||
#endif
|
||||
}
|
||||
|
||||
void test_basic_runqueue()
|
||||
{
|
||||
RunQueue<int, 4> q;
|
||||
// Check empty state.
|
||||
VERIFY(q.Empty());
|
||||
VERIFY_IS_EQUAL(0u, q.Size());
|
||||
VERIFY_IS_EQUAL(0, q.PopFront());
|
||||
std::vector<int> stolen;
|
||||
VERIFY_IS_EQUAL(0, q.PopBackHalf(&stolen));
|
||||
VERIFY_IS_EQUAL(0u, stolen.size());
|
||||
// Push one front, pop one front.
|
||||
VERIFY_IS_EQUAL(0, q.PushFront(1));
|
||||
VERIFY_IS_EQUAL(1, q.Size());
|
||||
VERIFY_IS_EQUAL(1, q.PopFront());
|
||||
VERIFY_IS_EQUAL(0, q.Size());
|
||||
// Push front to overflow.
|
||||
VERIFY_IS_EQUAL(0, q.PushFront(2));
|
||||
VERIFY_IS_EQUAL(1, q.Size());
|
||||
VERIFY_IS_EQUAL(0, q.PushFront(3));
|
||||
VERIFY_IS_EQUAL(2, q.Size());
|
||||
VERIFY_IS_EQUAL(0, q.PushFront(4));
|
||||
VERIFY_IS_EQUAL(3, q.Size());
|
||||
VERIFY_IS_EQUAL(0, q.PushFront(5));
|
||||
VERIFY_IS_EQUAL(4, q.Size());
|
||||
VERIFY_IS_EQUAL(6, q.PushFront(6));
|
||||
VERIFY_IS_EQUAL(4, q.Size());
|
||||
VERIFY_IS_EQUAL(5, q.PopFront());
|
||||
VERIFY_IS_EQUAL(3, q.Size());
|
||||
VERIFY_IS_EQUAL(4, q.PopFront());
|
||||
VERIFY_IS_EQUAL(2, q.Size());
|
||||
VERIFY_IS_EQUAL(3, q.PopFront());
|
||||
VERIFY_IS_EQUAL(1, q.Size());
|
||||
VERIFY_IS_EQUAL(2, q.PopFront());
|
||||
VERIFY_IS_EQUAL(0, q.Size());
|
||||
VERIFY_IS_EQUAL(0, q.PopFront());
|
||||
// Push one back, pop one back.
|
||||
VERIFY_IS_EQUAL(0, q.PushBack(7));
|
||||
VERIFY_IS_EQUAL(1, q.Size());
|
||||
VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen));
|
||||
VERIFY_IS_EQUAL(1, stolen.size());
|
||||
VERIFY_IS_EQUAL(7, stolen[0]);
|
||||
VERIFY_IS_EQUAL(0, q.Size());
|
||||
stolen.clear();
|
||||
// Push back to overflow.
|
||||
VERIFY_IS_EQUAL(0, q.PushBack(8));
|
||||
VERIFY_IS_EQUAL(1, q.Size());
|
||||
VERIFY_IS_EQUAL(0, q.PushBack(9));
|
||||
VERIFY_IS_EQUAL(2, q.Size());
|
||||
VERIFY_IS_EQUAL(0, q.PushBack(10));
|
||||
VERIFY_IS_EQUAL(3, q.Size());
|
||||
VERIFY_IS_EQUAL(0, q.PushBack(11));
|
||||
VERIFY_IS_EQUAL(4, q.Size());
|
||||
VERIFY_IS_EQUAL(12, q.PushBack(12));
|
||||
VERIFY_IS_EQUAL(4, q.Size());
|
||||
// Pop back in halves.
|
||||
VERIFY_IS_EQUAL(2, q.PopBackHalf(&stolen));
|
||||
VERIFY_IS_EQUAL(2, stolen.size());
|
||||
VERIFY_IS_EQUAL(10, stolen[0]);
|
||||
VERIFY_IS_EQUAL(11, stolen[1]);
|
||||
VERIFY_IS_EQUAL(2, q.Size());
|
||||
stolen.clear();
|
||||
VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen));
|
||||
VERIFY_IS_EQUAL(1, stolen.size());
|
||||
VERIFY_IS_EQUAL(9, stolen[0]);
|
||||
VERIFY_IS_EQUAL(1, q.Size());
|
||||
stolen.clear();
|
||||
VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen));
|
||||
VERIFY_IS_EQUAL(1, stolen.size());
|
||||
VERIFY_IS_EQUAL(8, stolen[0]);
|
||||
stolen.clear();
|
||||
VERIFY_IS_EQUAL(0, q.PopBackHalf(&stolen));
|
||||
VERIFY_IS_EQUAL(0, stolen.size());
|
||||
// Empty again.
|
||||
VERIFY(q.Empty());
|
||||
VERIFY_IS_EQUAL(0, q.Size());
|
||||
}
|
||||
|
||||
// Empty tests that the queue is not claimed to be empty when is is in fact not.
|
||||
// Emptiness property is crucial part of thread pool blocking scheme,
|
||||
// so we go to great effort to ensure this property. We create a queue with
|
||||
// 1 element and then push 1 element (either front or back at random) and pop
|
||||
// 1 element (either front or back at random). So queue always contains at least
|
||||
// 1 element, but otherwise changes chaotically. Another thread constantly tests
|
||||
// that the queue is not claimed to be empty.
|
||||
void test_empty_runqueue()
|
||||
{
|
||||
RunQueue<int, 4> q;
|
||||
q.PushFront(1);
|
||||
std::atomic<bool> done(false);
|
||||
std::thread mutator([&q, &done]() {
|
||||
unsigned rnd = 0;
|
||||
std::vector<int> stolen;
|
||||
for (int i = 0; i < 1 << 18; i++) {
|
||||
if (rand_reentrant(&rnd) % 2)
|
||||
VERIFY_IS_EQUAL(0, q.PushFront(1));
|
||||
else
|
||||
VERIFY_IS_EQUAL(0, q.PushBack(1));
|
||||
if (rand_reentrant(&rnd) % 2)
|
||||
VERIFY_IS_EQUAL(1, q.PopFront());
|
||||
else {
|
||||
for (;;) {
|
||||
if (q.PopBackHalf(&stolen) == 1) {
|
||||
stolen.clear();
|
||||
break;
|
||||
}
|
||||
VERIFY_IS_EQUAL(0, stolen.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
done = true;
|
||||
});
|
||||
while (!done) {
|
||||
VERIFY(!q.Empty());
|
||||
int size = q.Size();
|
||||
VERIFY_GE(size, 1);
|
||||
VERIFY_LE(size, 2);
|
||||
}
|
||||
VERIFY_IS_EQUAL(1, q.PopFront());
|
||||
mutator.join();
|
||||
}
|
||||
|
||||
// Stress is a chaotic random test.
|
||||
// One thread (owner) calls PushFront/PopFront, other threads call PushBack/
|
||||
// PopBack. Ensure that we don't crash, deadlock, and all sanity checks pass.
|
||||
void test_stress_runqueue()
|
||||
{
|
||||
static const int kEvents = 1 << 18;
|
||||
RunQueue<int, 8> q;
|
||||
std::atomic<int> total(0);
|
||||
std::vector<std::unique_ptr<std::thread>> threads;
|
||||
threads.emplace_back(new std::thread([&q, &total]() {
|
||||
int sum = 0;
|
||||
int pushed = 1;
|
||||
int popped = 1;
|
||||
while (pushed < kEvents || popped < kEvents) {
|
||||
if (pushed < kEvents) {
|
||||
if (q.PushFront(pushed) == 0) {
|
||||
sum += pushed;
|
||||
pushed++;
|
||||
}
|
||||
}
|
||||
if (popped < kEvents) {
|
||||
int v = q.PopFront();
|
||||
if (v != 0) {
|
||||
sum -= v;
|
||||
popped++;
|
||||
}
|
||||
}
|
||||
}
|
||||
total += sum;
|
||||
}));
|
||||
for (int i = 0; i < 2; i++) {
|
||||
threads.emplace_back(new std::thread([&q, &total]() {
|
||||
int sum = 0;
|
||||
for (int j = 1; j < kEvents; j++) {
|
||||
if (q.PushBack(j) == 0) {
|
||||
sum += j;
|
||||
continue;
|
||||
}
|
||||
EIGEN_THREAD_YIELD();
|
||||
j--;
|
||||
}
|
||||
total += sum;
|
||||
}));
|
||||
threads.emplace_back(new std::thread([&q, &total]() {
|
||||
int sum = 0;
|
||||
std::vector<int> stolen;
|
||||
for (int j = 1; j < kEvents;) {
|
||||
if (q.PopBackHalf(&stolen) == 0) {
|
||||
EIGEN_THREAD_YIELD();
|
||||
continue;
|
||||
}
|
||||
while (stolen.size() && j < kEvents) {
|
||||
int v = stolen.back();
|
||||
stolen.pop_back();
|
||||
VERIFY_IS_NOT_EQUAL(v, 0);
|
||||
sum += v;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
while (stolen.size()) {
|
||||
int v = stolen.back();
|
||||
stolen.pop_back();
|
||||
VERIFY_IS_NOT_EQUAL(v, 0);
|
||||
while ((v = q.PushBack(v)) != 0) EIGEN_THREAD_YIELD();
|
||||
}
|
||||
total -= sum;
|
||||
}));
|
||||
}
|
||||
for (size_t i = 0; i < threads.size(); i++) threads[i]->join();
|
||||
VERIFY(q.Empty());
|
||||
VERIFY(total.load() == 0);
|
||||
}
|
||||
|
||||
void test_cxx11_runqueue()
|
||||
{
|
||||
CALL_SUBTEST_1(test_basic_runqueue());
|
||||
CALL_SUBTEST_2(test_empty_runqueue());
|
||||
CALL_SUBTEST_3(test_stress_runqueue());
|
||||
}
|
||||
@@ -661,6 +661,9 @@ void test_cuda_digamma()
|
||||
for (int i = 5; i < 7; ++i) {
|
||||
VERIFY_IS_EQUAL(out(i), expected_out(i));
|
||||
}
|
||||
|
||||
cudaFree(d_in);
|
||||
cudaFree(d_out);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
@@ -718,13 +721,17 @@ void test_cuda_zeta()
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
VERIFY_IS_EQUAL(out(0), expected_out(0));
|
||||
VERIFY_IS_APPROX_OR_LESS_THAN(out(3), expected_out(3));
|
||||
VERIFY((std::isnan)(out(3)));
|
||||
|
||||
for (int i = 1; i < 6; ++i) {
|
||||
if (i != 3) {
|
||||
VERIFY_IS_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_in_x);
|
||||
cudaFree(d_in_q);
|
||||
cudaFree(d_out);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
@@ -787,6 +794,10 @@ void test_cuda_polygamma()
|
||||
for (int i = 0; i < 7; ++i) {
|
||||
VERIFY_IS_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
|
||||
cudaFree(d_in_x);
|
||||
cudaFree(d_in_n);
|
||||
cudaFree(d_out);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
@@ -826,9 +837,9 @@ void test_cuda_igamma()
|
||||
Scalar* d_a;
|
||||
Scalar* d_x;
|
||||
Scalar* d_out;
|
||||
cudaMalloc((void**)(&d_a), bytes);
|
||||
cudaMalloc((void**)(&d_x), bytes);
|
||||
cudaMalloc((void**)(&d_out), bytes);
|
||||
assert(cudaMalloc((void**)(&d_a), bytes) == cudaSuccess);
|
||||
assert(cudaMalloc((void**)(&d_x), bytes) == cudaSuccess);
|
||||
assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess);
|
||||
|
||||
cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice);
|
||||
@@ -854,6 +865,10 @@ void test_cuda_igamma()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_a);
|
||||
cudaFree(d_x);
|
||||
cudaFree(d_out);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
@@ -920,6 +935,10 @@ void test_cuda_igammac()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_a);
|
||||
cudaFree(d_x);
|
||||
cudaFree(d_out);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
@@ -935,8 +954,8 @@ void test_cuda_erf(const Scalar stddev)
|
||||
|
||||
Scalar* d_in;
|
||||
Scalar* d_out;
|
||||
cudaMalloc((void**)(&d_in), bytes);
|
||||
cudaMalloc((void**)(&d_out), bytes);
|
||||
assert(cudaMalloc((void**)(&d_in), bytes) == cudaSuccess);
|
||||
assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess);
|
||||
|
||||
cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
|
||||
@@ -20,6 +20,8 @@ static void test_0d()
|
||||
TensorFixedSize<float, Sizes<> > scalar1;
|
||||
TensorFixedSize<float, Sizes<>, RowMajor> scalar2;
|
||||
VERIFY_IS_EQUAL(scalar1.rank(), 0);
|
||||
VERIFY_IS_EQUAL(scalar1.size(), 1);
|
||||
VERIFY_IS_EQUAL(array_prod(scalar1.dimensions()), 1);
|
||||
|
||||
scalar1() = 7.0;
|
||||
scalar2() = 13.0;
|
||||
|
||||
@@ -16,7 +16,7 @@ using Eigen::RowMajor;
|
||||
|
||||
static void test_tanh()
|
||||
{
|
||||
Tensor<float, 1> vec1({6});
|
||||
Tensor<float, 1> vec1(6);
|
||||
vec1.setRandom();
|
||||
|
||||
Tensor<float, 1> vec2 = vec1.tanh();
|
||||
@@ -28,7 +28,7 @@ static void test_tanh()
|
||||
|
||||
static void test_sigmoid()
|
||||
{
|
||||
Tensor<float, 1> vec1({6});
|
||||
Tensor<float, 1> vec1(6);
|
||||
vec1.setRandom();
|
||||
|
||||
Tensor<float, 1> vec2 = vec1.sigmoid();
|
||||
|
||||
@@ -14,8 +14,8 @@
|
||||
|
||||
static void test_simple()
|
||||
{
|
||||
Tensor<float, 1, ColMajor> vec1({6});
|
||||
Tensor<float, 1, ColMajor, int> vec2({6});
|
||||
Tensor<float, 1, ColMajor> vec1(6);
|
||||
Tensor<float, 1, ColMajor, int> vec2(6);
|
||||
|
||||
vec1(0) = 4.0; vec2(0) = 0.0;
|
||||
vec1(1) = 8.0; vec2(1) = 1.0;
|
||||
|
||||
@@ -228,6 +228,42 @@ void test_cuda_reductions() {
|
||||
gpu_device.deallocate(d_res_float);
|
||||
}
|
||||
|
||||
void test_cuda_forced_evals() {
|
||||
|
||||
Eigen::CudaStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
int num_elem = 101;
|
||||
|
||||
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
|
||||
d_float, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half(
|
||||
d_res_half, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
|
||||
d_res_float, num_elem);
|
||||
|
||||
gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
|
||||
gpu_res_float.device(gpu_device) = gpu_float.abs();
|
||||
gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>();
|
||||
|
||||
Tensor<float, 1> half_prec(num_elem);
|
||||
Tensor<float, 1> full_prec(num_elem);
|
||||
gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
|
||||
gpu_device.synchronize();
|
||||
|
||||
for (int i = 0; i < num_elem; ++i) {
|
||||
std::cout << "Checking unary " << i << std::endl;
|
||||
VERIFY_IS_APPROX(full_prec(i), half_prec(i));
|
||||
}
|
||||
|
||||
gpu_device.deallocate(d_float);
|
||||
gpu_device.deallocate(d_res_half);
|
||||
gpu_device.deallocate(d_res_float);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -246,6 +282,7 @@ void test_cxx11_tensor_of_float16_cuda()
|
||||
CALL_SUBTEST_1(test_cuda_elementwise());
|
||||
CALL_SUBTEST_2(test_cuda_contractions());
|
||||
CALL_SUBTEST_3(test_cuda_reductions());
|
||||
CALL_SUBTEST_4(test_cuda_forced_evals());
|
||||
}
|
||||
else {
|
||||
std::cout << "Half floats require compute capability of at least 5.3. This device only supports " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << ". Skipping the test" << std::endl;
|
||||
|
||||
@@ -792,7 +792,9 @@ void testNistMGH10(void)
|
||||
MGH10_functor functor;
|
||||
LevenbergMarquardt<MGH10_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
++g_test_level;
|
||||
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
|
||||
--g_test_level;
|
||||
// was: VERIFY_IS_EQUAL(info, 1);
|
||||
|
||||
// check norm^2
|
||||
|
||||
Reference in New Issue
Block a user