From 395c835f4b11fa70bf8eaaffab1177e6120585d3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 22 Jul 2016 15:30:24 +0200 Subject: [PATCH 1/5] Fix CUDA compilation --- Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index dcb948c5a..8931f4662 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -262,7 +262,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul(c #else float a1 = __low2float(a); float a2 = __high2float(a); - return Eigen::half(internal::raw_uint16_to_half(__float2half_rn(a1 * a2))); + return Eigen::half(half_impl::raw_uint16_to_half(__float2half_rn(a1 * a2))); #endif } From 24af67a6cc102f8e5dde881608880b6b5264e336 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 22 Jul 2016 15:30:54 +0200 Subject: [PATCH 2/5] Fix boostmultiprec for C++03 --- test/boostmultiprec.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/boostmultiprec.cpp b/test/boostmultiprec.cpp index a83ae4b78..3e16aeabd 100644 --- a/test/boostmultiprec.cpp +++ b/test/boostmultiprec.cpp @@ -71,6 +71,17 @@ namespace Eigen { template<> Real test_precision() { return 1e-50; } + + namespace internal { + template + struct cast_impl + { + static inline NewType run(const Real& x) + { + return x.template convert_to(); + } + }; + } } namespace boost { From 7acf23c14c7827fa044c7010ce7bcc7802179a12 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 22 Jul 2016 15:41:23 +0200 Subject: [PATCH 3/5] Truely split unit test. --- .../test/cxx11_tensor_of_float16_cuda.cu | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index 34e9f54a0..fe2e58929 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -21,6 +21,7 @@ using Eigen::Tensor; #ifdef EIGEN_HAS_CUDA_FP16 +template void test_cuda_conversion() { Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); @@ -55,7 +56,7 @@ void test_cuda_conversion() { gpu_device.deallocate(d_conv); } - +template void test_cuda_unary() { Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); @@ -92,7 +93,7 @@ void test_cuda_unary() { gpu_device.deallocate(d_res_float); } - +template void test_cuda_elementwise() { Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); @@ -134,6 +135,7 @@ void test_cuda_elementwise() { gpu_device.deallocate(d_res_float); } +template void test_cuda_trancendental() { Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); @@ -196,7 +198,7 @@ void test_cuda_trancendental() { gpu_device.deallocate(d_res2_float); } - +template void test_cuda_contractions() { Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); @@ -247,7 +249,7 @@ void test_cuda_contractions() { gpu_device.deallocate(d_res_float); } - +template void test_cuda_reductions(int size1, int size2, int redux) { std::cout << "Reducing " << size1 << " by " << size2 @@ -296,17 +298,19 @@ void test_cuda_reductions(int size1, int size2, int redux) { gpu_device.deallocate(d_res_float); } +template void test_cuda_reductions() { - test_cuda_reductions(13, 13, 0); - test_cuda_reductions(13, 13, 1); + test_cuda_reductions(13, 13, 0); + test_cuda_reductions(13, 13, 1); - test_cuda_reductions(35, 36, 0); - test_cuda_reductions(35, 36, 1); + test_cuda_reductions(35, 36, 0); + test_cuda_reductions(35, 36, 1); - test_cuda_reductions(36, 35, 0); - test_cuda_reductions(36, 35, 1); + test_cuda_reductions(36, 35, 0); + test_cuda_reductions(36, 35, 1); } +template void test_cuda_full_reductions() { Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); @@ -355,7 +359,7 @@ void test_cuda_full_reductions() { gpu_device.deallocate(d_res_float); } - +template void test_cuda_forced_evals() { Eigen::CudaStreamDevice stream; @@ -409,14 +413,14 @@ void test_cuda_forced_evals() { void test_cxx11_tensor_of_float16_cuda() { #ifdef EIGEN_HAS_CUDA_FP16 - CALL_SUBTEST_1(test_cuda_conversion()); - CALL_SUBTEST_1(test_cuda_unary()); - CALL_SUBTEST_1(test_cuda_elementwise()); - CALL_SUBTEST_1(test_cuda_trancendental()); - CALL_SUBTEST_2(test_cuda_contractions()); - CALL_SUBTEST_3(test_cuda_reductions()); - CALL_SUBTEST_4(test_cuda_full_reductions()); - CALL_SUBTEST_5(test_cuda_forced_evals()); + CALL_SUBTEST_1(test_cuda_conversion()); + CALL_SUBTEST_1(test_cuda_unary()); + CALL_SUBTEST_1(test_cuda_elementwise()); + CALL_SUBTEST_1(test_cuda_trancendental()); + CALL_SUBTEST_2(test_cuda_contractions()); + CALL_SUBTEST_3(test_cuda_reductions()); + CALL_SUBTEST_4(test_cuda_full_reductions()); + CALL_SUBTEST_5(test_cuda_forced_evals()); #else std::cout << "Half floats are not supported by this version of cuda: skipping the test" << std::endl; #endif From d7a0e52478e6f769ce2df51f009d0b3ca535b80b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 22 Jul 2016 15:44:26 +0200 Subject: [PATCH 4/5] Fix testing of log nearby 1 --- unsupported/test/cxx11_tensor_of_float16_cuda.cu | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index fe2e58929..b6df5a4d2 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -188,7 +188,10 @@ void test_cuda_trancendental() { } for (int i = 0; i < num_elem; ++i) { std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl; - VERIFY_IS_APPROX(full_prec2(i), half_prec2(i)); + if(std::abs(input2(i)-1.f)<0.05f) // log lacks accurary nearby 1 + VERIFY_IS_APPROX(full_prec2(i)+Eigen::half(0.1f), half_prec2(i)+Eigen::half(0.1f)); + else + VERIFY_IS_APPROX(full_prec2(i), half_prec2(i)); } gpu_device.deallocate(d_float1); gpu_device.deallocate(d_float2); From 60d5980a41e3c96f801604bc781b7879fcbece42 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 22 Jul 2016 15:46:23 +0200 Subject: [PATCH 5/5] add a note --- test/boostmultiprec.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test/boostmultiprec.cpp b/test/boostmultiprec.cpp index 3e16aeabd..b4d0decff 100644 --- a/test/boostmultiprec.cpp +++ b/test/boostmultiprec.cpp @@ -72,6 +72,7 @@ namespace Eigen { template<> Real test_precision() { return 1e-50; } + // needed in C++93 mode where number does not support explicit cast. namespace internal { template struct cast_impl