mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Merged eigen/eigen into default
This commit is contained in:
@@ -110,35 +110,48 @@ ei_add_test(minres)
|
||||
ei_add_test(levenberg_marquardt)
|
||||
ei_add_test(kronecker_product)
|
||||
|
||||
# TODO: The following test names are prefixed with the cxx11 string, since historically
|
||||
# the tests depended on c++11. This isn't the case anymore so we ought to rename them.
|
||||
ei_add_test(cxx11_float16)
|
||||
ei_add_test(cxx11_tensor_dimension)
|
||||
ei_add_test(cxx11_tensor_map)
|
||||
ei_add_test(cxx11_tensor_assign)
|
||||
ei_add_test(cxx11_tensor_comparisons)
|
||||
ei_add_test(cxx11_tensor_forced_eval)
|
||||
ei_add_test(cxx11_tensor_math)
|
||||
ei_add_test(cxx11_tensor_const)
|
||||
ei_add_test(cxx11_tensor_intdiv)
|
||||
ei_add_test(cxx11_tensor_casts)
|
||||
ei_add_test(cxx11_tensor_empty)
|
||||
ei_add_test(cxx11_tensor_sugar)
|
||||
ei_add_test(cxx11_tensor_roundings)
|
||||
ei_add_test(cxx11_tensor_layout_swap)
|
||||
ei_add_test(cxx11_tensor_io)
|
||||
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
|
||||
# This test requires __uint128_t which is only available on 64bit systems
|
||||
ei_add_test(cxx11_tensor_uint128)
|
||||
endif()
|
||||
|
||||
if(EIGEN_TEST_CXX11)
|
||||
# It should be safe to always run these tests as there is some fallback code for
|
||||
# older compiler that don't support cxx11.
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
ei_add_test(cxx11_float16)
|
||||
ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_meta)
|
||||
ei_add_test(cxx11_tensor_simple)
|
||||
# ei_add_test(cxx11_tensor_symmetry)
|
||||
ei_add_test(cxx11_tensor_assign)
|
||||
ei_add_test(cxx11_tensor_dimension)
|
||||
ei_add_test(cxx11_tensor_index_list)
|
||||
ei_add_test(cxx11_tensor_mixed_indices)
|
||||
ei_add_test(cxx11_tensor_comparisons)
|
||||
ei_add_test(cxx11_tensor_contraction)
|
||||
ei_add_test(cxx11_tensor_convolution)
|
||||
ei_add_test(cxx11_tensor_expr)
|
||||
ei_add_test(cxx11_tensor_math)
|
||||
ei_add_test(cxx11_tensor_forced_eval)
|
||||
ei_add_test(cxx11_tensor_fixed_size)
|
||||
ei_add_test(cxx11_tensor_const)
|
||||
ei_add_test(cxx11_tensor_of_const_values)
|
||||
ei_add_test(cxx11_tensor_of_complex)
|
||||
ei_add_test(cxx11_tensor_of_strings)
|
||||
ei_add_test(cxx11_tensor_intdiv)
|
||||
ei_add_test(cxx11_tensor_lvalue)
|
||||
ei_add_test(cxx11_tensor_map)
|
||||
ei_add_test(cxx11_tensor_broadcasting)
|
||||
ei_add_test(cxx11_tensor_chipping)
|
||||
ei_add_test(cxx11_tensor_concatenation)
|
||||
@@ -156,23 +169,11 @@ if(EIGEN_TEST_CXX11)
|
||||
ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_tensor_ref)
|
||||
ei_add_test(cxx11_tensor_random)
|
||||
ei_add_test(cxx11_tensor_casts)
|
||||
ei_add_test(cxx11_tensor_roundings)
|
||||
ei_add_test(cxx11_tensor_reverse)
|
||||
ei_add_test(cxx11_tensor_layout_swap)
|
||||
ei_add_test(cxx11_tensor_io)
|
||||
ei_add_test(cxx11_tensor_generator)
|
||||
ei_add_test(cxx11_tensor_custom_op)
|
||||
ei_add_test(cxx11_tensor_custom_index)
|
||||
ei_add_test(cxx11_tensor_sugar)
|
||||
ei_add_test(cxx11_tensor_fft)
|
||||
ei_add_test(cxx11_tensor_ifft)
|
||||
ei_add_test(cxx11_tensor_empty)
|
||||
|
||||
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
|
||||
# This test requires __uint128_t which is only available on 64bit systems
|
||||
ei_add_test(cxx11_tensor_uint128)
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
|
||||
@@ -31,9 +31,9 @@ void test_conversion()
|
||||
VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002);
|
||||
|
||||
// Verify round-to-nearest-even behavior.
|
||||
float val1 = float(half(__half{0x3c00}));
|
||||
float val2 = float(half(__half{0x3c01}));
|
||||
float val3 = float(half(__half{0x3c02}));
|
||||
float val1 = float(half(__half(0x3c00)));
|
||||
float val2 = float(half(__half(0x3c01)));
|
||||
float val3 = float(half(__half(0x3c02)));
|
||||
VERIFY_IS_EQUAL(half(0.5 * (val1 + val2)).x, 0x3c00);
|
||||
VERIFY_IS_EQUAL(half(0.5 * (val2 + val3)).x, 0x3c02);
|
||||
|
||||
@@ -49,21 +49,21 @@ void test_conversion()
|
||||
VERIFY_IS_EQUAL(half(true).x, 0x3c00);
|
||||
|
||||
// Conversion to float.
|
||||
VERIFY_IS_EQUAL(float(half(__half{0x0000})), 0.0f);
|
||||
VERIFY_IS_EQUAL(float(half(__half{0x3c00})), 1.0f);
|
||||
VERIFY_IS_EQUAL(float(half(__half(0x0000))), 0.0f);
|
||||
VERIFY_IS_EQUAL(float(half(__half(0x3c00))), 1.0f);
|
||||
|
||||
// Denormals.
|
||||
VERIFY_IS_APPROX(float(half(__half{0x8001})), -5.96046e-08f);
|
||||
VERIFY_IS_APPROX(float(half(__half{0x0001})), 5.96046e-08f);
|
||||
VERIFY_IS_APPROX(float(half(__half{0x0002})), 1.19209e-07f);
|
||||
VERIFY_IS_APPROX(float(half(__half(0x8001))), -5.96046e-08f);
|
||||
VERIFY_IS_APPROX(float(half(__half(0x0001))), 5.96046e-08f);
|
||||
VERIFY_IS_APPROX(float(half(__half(0x0002))), 1.19209e-07f);
|
||||
|
||||
// NaNs and infinities.
|
||||
VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number.
|
||||
VERIFY(!(numext::isnan)(float(half(0.0f))));
|
||||
VERIFY((numext::isinf)(float(half(__half{0xfc00}))));
|
||||
VERIFY((numext::isnan)(float(half(__half{0xfc01}))));
|
||||
VERIFY((numext::isinf)(float(half(__half{0x7c00}))));
|
||||
VERIFY((numext::isnan)(float(half(__half{0x7c01}))));
|
||||
VERIFY((numext::isinf)(float(half(__half(0xfc00)))));
|
||||
VERIFY((numext::isnan)(float(half(__half(0xfc01)))));
|
||||
VERIFY((numext::isinf)(float(half(__half(0x7c00)))));
|
||||
VERIFY((numext::isnan)(float(half(__half(0x7c01)))));
|
||||
|
||||
#if !EIGEN_COMP_MSVC
|
||||
// Visual Studio errors out on divisions by 0
|
||||
@@ -73,12 +73,12 @@ void test_conversion()
|
||||
#endif
|
||||
|
||||
// Exactly same checks as above, just directly on the half representation.
|
||||
VERIFY(!(numext::isinf)(half(__half{0x7bff})));
|
||||
VERIFY(!(numext::isnan)(half(__half{0x0000})));
|
||||
VERIFY((numext::isinf)(half(__half{0xfc00})));
|
||||
VERIFY((numext::isnan)(half(__half{0xfc01})));
|
||||
VERIFY((numext::isinf)(half(__half{0x7c00})));
|
||||
VERIFY((numext::isnan)(half(__half{0x7c01})));
|
||||
VERIFY(!(numext::isinf)(half(__half(0x7bff))));
|
||||
VERIFY(!(numext::isnan)(half(__half(0x0000))));
|
||||
VERIFY((numext::isinf)(half(__half(0xfc00))));
|
||||
VERIFY((numext::isnan)(half(__half(0xfc01))));
|
||||
VERIFY((numext::isinf)(half(__half(0x7c00))));
|
||||
VERIFY((numext::isnan)(half(__half(0x7c01))));
|
||||
|
||||
#if !EIGEN_COMP_MSVC
|
||||
// Visual Studio errors out on divisions by 0
|
||||
|
||||
@@ -64,7 +64,7 @@ static void test_argmax_tuple_reducer()
|
||||
Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
|
||||
DimensionList<DenseIndex, 4> dims;
|
||||
reduced = index_tuples.reduce(
|
||||
dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float>>());
|
||||
dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
|
||||
|
||||
Tensor<float, 0, DataLayout> maxi = tensor.maximum();
|
||||
|
||||
@@ -74,7 +74,7 @@ static void test_argmax_tuple_reducer()
|
||||
for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
|
||||
Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
|
||||
reduced_by_dims = index_tuples.reduce(
|
||||
reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float>>());
|
||||
reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
|
||||
|
||||
Tensor<float, 1, DataLayout> max_by_dims = tensor.maximum(reduce_dims);
|
||||
|
||||
@@ -96,7 +96,7 @@ static void test_argmin_tuple_reducer()
|
||||
Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
|
||||
DimensionList<DenseIndex, 4> dims;
|
||||
reduced = index_tuples.reduce(
|
||||
dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float>>());
|
||||
dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
|
||||
|
||||
Tensor<float, 0, DataLayout> mini = tensor.minimum();
|
||||
|
||||
@@ -106,7 +106,7 @@ static void test_argmin_tuple_reducer()
|
||||
for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
|
||||
Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
|
||||
reduced_by_dims = index_tuples.reduce(
|
||||
reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float>>());
|
||||
reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
|
||||
|
||||
Tensor<float, 1, DataLayout> min_by_dims = tensor.minimum(reduce_dims);
|
||||
|
||||
|
||||
@@ -37,7 +37,6 @@ static void test_fixed_size()
|
||||
VERIFY_IS_EQUAL(dimensions.TotalSize(), 2*3*7);
|
||||
}
|
||||
|
||||
|
||||
static void test_match()
|
||||
{
|
||||
Eigen::DSizes<int, 3> dyn(2,3,7);
|
||||
@@ -49,10 +48,22 @@ static void test_match()
|
||||
VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn1, dyn2), false);
|
||||
}
|
||||
|
||||
static void test_rank_zero()
|
||||
{
|
||||
Eigen::Sizes<> scalar;
|
||||
VERIFY_IS_EQUAL(scalar.TotalSize(), 1);
|
||||
VERIFY_IS_EQUAL(scalar.rank(), 0);
|
||||
VERIFY_IS_EQUAL(internal::array_prod(scalar), 1);
|
||||
|
||||
Eigen::DSizes<ptrdiff_t, 0> dscalar;
|
||||
VERIFY_IS_EQUAL(dscalar.TotalSize(), 1);
|
||||
VERIFY_IS_EQUAL(dscalar.rank(), 0);
|
||||
}
|
||||
|
||||
void test_cxx11_tensor_dimension()
|
||||
{
|
||||
CALL_SUBTEST(test_dynamic_size());
|
||||
CALL_SUBTEST(test_fixed_size());
|
||||
CALL_SUBTEST(test_match());
|
||||
CALL_SUBTEST(test_rank_zero());
|
||||
}
|
||||
|
||||
@@ -24,10 +24,10 @@ static void test_empty_tensor()
|
||||
|
||||
static void test_empty_fixed_size_tensor()
|
||||
{
|
||||
TensorFixedSize<float, Sizes<0>> source;
|
||||
TensorFixedSize<float, Sizes<0>> tgt1 = source;
|
||||
TensorFixedSize<float, Sizes<0>> tgt2(source);
|
||||
TensorFixedSize<float, Sizes<0>> tgt3;
|
||||
TensorFixedSize<float, Sizes<0> > source;
|
||||
TensorFixedSize<float, Sizes<0> > tgt1 = source;
|
||||
TensorFixedSize<float, Sizes<0> > tgt2(source);
|
||||
TensorFixedSize<float, Sizes<0> > tgt3;
|
||||
tgt3 = tgt1;
|
||||
tgt3 = tgt2;
|
||||
}
|
||||
|
||||
@@ -130,9 +130,9 @@ static void test_tensor_map()
|
||||
static void test_2d()
|
||||
{
|
||||
float data1[6];
|
||||
TensorMap<TensorFixedSize<float, Sizes<2, 3> >> mat1(data1,2,3);
|
||||
TensorMap<TensorFixedSize<float, Sizes<2, 3> > > mat1(data1,2,3);
|
||||
float data2[6];
|
||||
TensorMap<TensorFixedSize<float, Sizes<2, 3>, RowMajor>> mat2(data2,2,3);
|
||||
TensorMap<TensorFixedSize<float, Sizes<2, 3>, RowMajor> > mat2(data2,2,3);
|
||||
|
||||
VERIFY_IS_EQUAL((mat1.size()), 2*3);
|
||||
VERIFY_IS_EQUAL(mat1.rank(), 2);
|
||||
@@ -153,7 +153,7 @@ static void test_2d()
|
||||
mat2(1,1) = -4.0;
|
||||
mat2(1,2) = -5.0;
|
||||
|
||||
TensorFixedSize<float, Sizes<2, 3>> mat3;
|
||||
TensorFixedSize<float, Sizes<2, 3> > mat3;
|
||||
TensorFixedSize<float, Sizes<2, 3>, RowMajor> mat4;
|
||||
mat3 = mat1.abs();
|
||||
mat4 = mat2.abs();
|
||||
|
||||
@@ -22,14 +22,15 @@ static void test_simple()
|
||||
m1.setRandom();
|
||||
m2.setRandom();
|
||||
|
||||
TensorMap<Tensor<float, 2>> mat1(m1.data(), 3,3);
|
||||
TensorMap<Tensor<float, 2>> mat2(m2.data(), 3,3);
|
||||
TensorMap<Tensor<float, 2> > mat1(m1.data(), 3,3);
|
||||
TensorMap<Tensor<float, 2> > mat2(m2.data(), 3,3);
|
||||
|
||||
Tensor<float, 2> mat3(3,3);
|
||||
mat3 = mat1;
|
||||
|
||||
typedef Tensor<float, 1>::DimensionPair DimPair;
|
||||
Eigen::array<DimPair, 1> dims({{DimPair(1, 0)}});
|
||||
Eigen::array<DimPair, 1> dims;
|
||||
dims[0] = DimPair(1, 0);
|
||||
|
||||
mat3 = mat3.contract(mat2, dims).eval();
|
||||
|
||||
@@ -60,7 +61,7 @@ static void test_const()
|
||||
Eigen::array<int, 2> bcast;
|
||||
bcast[0] = 3;
|
||||
bcast[1] = 1;
|
||||
const TensorMap<Tensor<const float, 2>> input_tensor(input.data(), 3, 3);
|
||||
const TensorMap<Tensor<const float, 2> > input_tensor(input.data(), 3, 3);
|
||||
Tensor<float, 2> output_tensor= (input_tensor - input_tensor.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast));
|
||||
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
|
||||
@@ -19,8 +19,8 @@ static void test_0d()
|
||||
Tensor<int, 0> scalar1;
|
||||
Tensor<int, 0, RowMajor> scalar2;
|
||||
|
||||
TensorMap<Tensor<const int, 0>> scalar3(scalar1.data());
|
||||
TensorMap<Tensor<const int, 0, RowMajor>> scalar4(scalar2.data());
|
||||
TensorMap<Tensor<const int, 0> > scalar3(scalar1.data());
|
||||
TensorMap<Tensor<const int, 0, RowMajor> > scalar4(scalar2.data());
|
||||
|
||||
scalar1() = 7;
|
||||
scalar2() = 13;
|
||||
@@ -37,8 +37,8 @@ static void test_1d()
|
||||
Tensor<int, 1> vec1(6);
|
||||
Tensor<int, 1, RowMajor> vec2(6);
|
||||
|
||||
TensorMap<Tensor<const int, 1>> vec3(vec1.data(), 6);
|
||||
TensorMap<Tensor<const int, 1, RowMajor>> vec4(vec2.data(), 6);
|
||||
TensorMap<Tensor<const int, 1> > vec3(vec1.data(), 6);
|
||||
TensorMap<Tensor<const int, 1, RowMajor> > vec4(vec2.data(), 6);
|
||||
|
||||
vec1(0) = 4; vec2(0) = 0;
|
||||
vec1(1) = 8; vec2(1) = 1;
|
||||
@@ -85,8 +85,8 @@ static void test_2d()
|
||||
mat2(1,1) = 4;
|
||||
mat2(1,2) = 5;
|
||||
|
||||
TensorMap<Tensor<const int, 2>> mat3(mat1.data(), 2, 3);
|
||||
TensorMap<Tensor<const int, 2, RowMajor>> mat4(mat2.data(), 2, 3);
|
||||
TensorMap<Tensor<const int, 2> > mat3(mat1.data(), 2, 3);
|
||||
TensorMap<Tensor<const int, 2, RowMajor> > mat4(mat2.data(), 2, 3);
|
||||
|
||||
VERIFY_IS_EQUAL(mat3.rank(), 2);
|
||||
VERIFY_IS_EQUAL(mat3.size(), 6);
|
||||
@@ -129,8 +129,8 @@ static void test_3d()
|
||||
}
|
||||
}
|
||||
|
||||
TensorMap<Tensor<const int, 3>> mat3(mat1.data(), 2, 3, 7);
|
||||
TensorMap<Tensor<const int, 3, RowMajor>> mat4(mat2.data(), array<DenseIndex, 3>{{2, 3, 7}});
|
||||
TensorMap<Tensor<const int, 3> > mat3(mat1.data(), 2, 3, 7);
|
||||
TensorMap<Tensor<const int, 3, RowMajor> > mat4(mat2.data(), 2, 3, 7);
|
||||
|
||||
VERIFY_IS_EQUAL(mat3.rank(), 3);
|
||||
VERIFY_IS_EQUAL(mat3.size(), 2*3*7);
|
||||
@@ -173,8 +173,8 @@ static void test_from_tensor()
|
||||
}
|
||||
}
|
||||
|
||||
TensorMap<Tensor<int, 3>> mat3(mat1);
|
||||
TensorMap<Tensor<int, 3, RowMajor>> mat4(mat2);
|
||||
TensorMap<Tensor<int, 3> > mat3(mat1);
|
||||
TensorMap<Tensor<int, 3, RowMajor> > mat4(mat2);
|
||||
|
||||
VERIFY_IS_EQUAL(mat3.rank(), 3);
|
||||
VERIFY_IS_EQUAL(mat3.size(), 2*3*7);
|
||||
@@ -199,19 +199,23 @@ static void test_from_tensor()
|
||||
}
|
||||
}
|
||||
|
||||
TensorFixedSize<int, Sizes<2,3,7>> mat5;
|
||||
TensorFixedSize<int, Sizes<2,3,7> > mat5;
|
||||
|
||||
val = 0;
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
for (int k = 0; k < 7; ++k) {
|
||||
mat5(i,j,k) = val;
|
||||
array<ptrdiff_t, 3> coords;
|
||||
coords[0] = i;
|
||||
coords[1] = j;
|
||||
coords[2] = k;
|
||||
mat5(coords) = val;
|
||||
val++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TensorMap<TensorFixedSize<int, Sizes<2,3,7>>> mat6(mat5);
|
||||
TensorMap<TensorFixedSize<int, Sizes<2,3,7> > > mat6(mat5);
|
||||
|
||||
VERIFY_IS_EQUAL(mat6.rank(), 3);
|
||||
VERIFY_IS_EQUAL(mat6.size(), 2*3*7);
|
||||
@@ -233,8 +237,8 @@ static void test_from_tensor()
|
||||
|
||||
static int f(const TensorMap<Tensor<int, 3> >& tensor) {
|
||||
// Size<0> empty;
|
||||
EIGEN_STATIC_ASSERT((internal::array_size<Sizes<>>::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
EIGEN_STATIC_ASSERT((internal::array_size<DSizes<int, 0>>::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
EIGEN_STATIC_ASSERT((internal::array_size<Sizes<> >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
EIGEN_STATIC_ASSERT((internal::array_size<DSizes<int, 0> >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
Tensor<int, 0> result = tensor.sum();
|
||||
return result();
|
||||
}
|
||||
@@ -253,7 +257,7 @@ static void test_casting()
|
||||
}
|
||||
}
|
||||
|
||||
TensorMap<Tensor<int, 3>> map(tensor);
|
||||
TensorMap<Tensor<int, 3> > map(tensor);
|
||||
int sum1 = f(map);
|
||||
int sum2 = f(tensor);
|
||||
|
||||
|
||||
@@ -134,6 +134,68 @@ void test_cuda_elementwise() {
|
||||
gpu_device.deallocate(d_res_float);
|
||||
}
|
||||
|
||||
void test_cuda_trancendental() {
|
||||
Eigen::CudaStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
int num_elem = 101;
|
||||
|
||||
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_res1_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_res1_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_res2_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_res2_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(
|
||||
d_float1, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(
|
||||
d_float2, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res1_half(
|
||||
d_res1_half, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res1_float(
|
||||
d_res1_float, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res2_half(
|
||||
d_res2_half, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res2_float(
|
||||
d_res2_float, num_elem);
|
||||
|
||||
gpu_float1.device(gpu_device) = gpu_float1.random();
|
||||
gpu_float2.device(gpu_device) = gpu_float2.random();
|
||||
gpu_res1_float.device(gpu_device) = gpu_float1.exp();
|
||||
gpu_res2_float.device(gpu_device) = gpu_float2.log();
|
||||
gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().exp().cast<float>();
|
||||
gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>().log().cast<float>();
|
||||
|
||||
Tensor<float, 1> input1(num_elem);
|
||||
Tensor<float, 1> half_prec1(num_elem);
|
||||
Tensor<float, 1> full_prec1(num_elem);
|
||||
Tensor<float, 1> input2(num_elem);
|
||||
Tensor<float, 1> half_prec2(num_elem);
|
||||
Tensor<float, 1> full_prec2(num_elem);
|
||||
gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(float));
|
||||
gpu_device.synchronize();
|
||||
|
||||
for (int i = 0; i < num_elem; ++i) {
|
||||
std::cout << "Checking elemwise exp " << i << " input = " << input1(i) << " full = " << full_prec1(i) << " half = " << half_prec1(i) << std::endl;
|
||||
VERIFY_IS_APPROX(full_prec1(i), half_prec1(i));
|
||||
}
|
||||
for (int i = 0; i < num_elem; ++i) {
|
||||
std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl;
|
||||
VERIFY_IS_APPROX(full_prec2(i), half_prec2(i));
|
||||
}
|
||||
gpu_device.deallocate(d_float1);
|
||||
gpu_device.deallocate(d_float2);
|
||||
gpu_device.deallocate(d_res1_half);
|
||||
gpu_device.deallocate(d_res1_float);
|
||||
gpu_device.deallocate(d_res2_half);
|
||||
gpu_device.deallocate(d_res2_float);
|
||||
}
|
||||
|
||||
|
||||
void test_cuda_contractions() {
|
||||
Eigen::CudaStreamDevice stream;
|
||||
@@ -280,6 +342,7 @@ void test_cxx11_tensor_of_float16_cuda()
|
||||
CALL_SUBTEST_1(test_cuda_conversion());
|
||||
CALL_SUBTEST_1(test_cuda_unary());
|
||||
CALL_SUBTEST_1(test_cuda_elementwise());
|
||||
CALL_SUBTEST_1(test_cuda_trancendental());
|
||||
CALL_SUBTEST_2(test_cuda_contractions());
|
||||
CALL_SUBTEST_3(test_cuda_reductions());
|
||||
CALL_SUBTEST_4(test_cuda_forced_evals());
|
||||
|
||||
@@ -195,7 +195,10 @@ static void test_3d()
|
||||
VERIFY_IS_EQUAL((epsilon(0,2,1)), -1);
|
||||
VERIFY_IS_EQUAL((epsilon(1,0,2)), -1);
|
||||
|
||||
array<Eigen::DenseIndex, 3> dims{{2,3,4}};
|
||||
array<Eigen::DenseIndex, 3> dims;
|
||||
dims[0] = 2;
|
||||
dims[1] = 3;
|
||||
dims[2] = 4;
|
||||
Tensor<int, 3> t1(dims);
|
||||
Tensor<int, 3, RowMajor> t2(dims);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user