From bd21aba1817f76f4e72ddf3c55ef23d4a62ed6f7 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sat, 30 Jan 2016 11:47:09 -0800 Subject: [PATCH] Sharded the cxx11_tensor_cuda test and fixed a memory leak --- unsupported/test/cxx11_tensor_cuda.cu | 131 +++++++++++++++++--------- 1 file changed, 88 insertions(+), 43 deletions(-) diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 79f1c5315..60f9314a5 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -63,6 +63,10 @@ void test_cuda_elementwise_small() { out(Eigen::array(i)), in1(Eigen::array(i)) + in2(Eigen::array(i))); } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); } void test_cuda_elementwise() @@ -113,6 +117,11 @@ void test_cuda_elementwise() } } } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); } void test_cuda_reduction() @@ -158,10 +167,13 @@ void test_cuda_reduction() VERIFY_IS_APPROX(out(i,j), expected); } } + + cudaFree(d_in1); + cudaFree(d_out); } template -static void test_cuda_contraction() +void test_cuda_contraction() { // with these dimensions, the output has 300 * 140 elements, which is // more than 30 * 1024, which is the number of threads in blocks on @@ -216,10 +228,14 @@ static void test_cuda_contraction() assert(false); } } + + cudaFree(d_t_left); + cudaFree(d_t_right); + cudaFree(d_t_result); } template -static void test_cuda_convolution_1d() +void test_cuda_convolution_1d() { Tensor input(74,37,11,137); Tensor kernel(4); @@ -266,9 +282,13 @@ static void test_cuda_convolution_1d() } } } + + cudaFree(d_input); + cudaFree(d_kernel); + cudaFree(d_out); } -static void test_cuda_convolution_inner_dim_col_major_1d() +void test_cuda_convolution_inner_dim_col_major_1d() { Tensor input(74,9,11,7); Tensor kernel(4); @@ -315,9 +335,13 @@ static void test_cuda_convolution_inner_dim_col_major_1d() } } } + + cudaFree(d_input); + cudaFree(d_kernel); + cudaFree(d_out); } -static void test_cuda_convolution_inner_dim_row_major_1d() +void test_cuda_convolution_inner_dim_row_major_1d() { Tensor input(7,9,11,74); Tensor kernel(4); @@ -364,10 +388,14 @@ static void test_cuda_convolution_inner_dim_row_major_1d() } } } + + cudaFree(d_input); + cudaFree(d_kernel); + cudaFree(d_out); } template -static void test_cuda_convolution_2d() +void test_cuda_convolution_2d() { Tensor input(74,37,11,137); Tensor kernel(3,4); @@ -424,10 +452,14 @@ static void test_cuda_convolution_2d() } } } + + cudaFree(d_input); + cudaFree(d_kernel); + cudaFree(d_out); } template -static void test_cuda_convolution_3d() +void test_cuda_convolution_3d() { Tensor input(Eigen::array(74,37,11,137,17)); Tensor kernel(3,4,2); @@ -498,6 +530,10 @@ static void test_cuda_convolution_3d() } } } + + cudaFree(d_input); + cudaFree(d_kernel); + cudaFree(d_out); } @@ -535,6 +571,9 @@ void test_cuda_lgamma(const Scalar stddev) VERIFY_IS_APPROX(out(i,j), (std::lgamma)(in(i,j))); } } + + cudaFree(d_in); + cudaFree(d_out); } template @@ -571,6 +610,9 @@ void test_cuda_erf(const Scalar stddev) VERIFY_IS_APPROX(out(i,j), (std::erf)(in(i,j))); } } + + cudaFree(d_in); + cudaFree(d_out); } template @@ -607,47 +649,50 @@ void test_cuda_erfc(const Scalar stddev) VERIFY_IS_APPROX(out(i,j), (std::erfc)(in(i,j))); } } + + cudaFree(d_in); + cudaFree(d_out); } void test_cxx11_tensor_cuda() { - CALL_SUBTEST(test_cuda_elementwise_small()); - CALL_SUBTEST(test_cuda_elementwise()); - CALL_SUBTEST(test_cuda_reduction()); - CALL_SUBTEST(test_cuda_contraction()); - CALL_SUBTEST(test_cuda_contraction()); - CALL_SUBTEST(test_cuda_convolution_1d()); - CALL_SUBTEST(test_cuda_convolution_1d()); - CALL_SUBTEST(test_cuda_convolution_inner_dim_col_major_1d()); - CALL_SUBTEST(test_cuda_convolution_inner_dim_row_major_1d()); - CALL_SUBTEST(test_cuda_convolution_2d()); - CALL_SUBTEST(test_cuda_convolution_2d()); - CALL_SUBTEST(test_cuda_convolution_3d()); - CALL_SUBTEST(test_cuda_convolution_3d()); - CALL_SUBTEST(test_cuda_lgamma(1.0f)); - CALL_SUBTEST(test_cuda_lgamma(100.0f)); - CALL_SUBTEST(test_cuda_lgamma(0.01f)); - CALL_SUBTEST(test_cuda_lgamma(0.001f)); - CALL_SUBTEST(test_cuda_erf(1.0f)); - CALL_SUBTEST(test_cuda_erf(100.0f)); - CALL_SUBTEST(test_cuda_erf(0.01f)); - CALL_SUBTEST(test_cuda_erf(0.001f)); - CALL_SUBTEST(test_cuda_erfc(1.0f)); + CALL_SUBTEST_1(test_cuda_elementwise_small()); + CALL_SUBTEST_1(test_cuda_elementwise()); + CALL_SUBTEST_1(test_cuda_reduction()); + CALL_SUBTEST_2(test_cuda_contraction()); + CALL_SUBTEST_2(test_cuda_contraction()); + CALL_SUBTEST_3(test_cuda_convolution_1d()); + CALL_SUBTEST_3(test_cuda_convolution_1d()); + CALL_SUBTEST_3(test_cuda_convolution_inner_dim_col_major_1d()); + CALL_SUBTEST_3(test_cuda_convolution_inner_dim_row_major_1d()); + CALL_SUBTEST_3(test_cuda_convolution_2d()); + CALL_SUBTEST_3(test_cuda_convolution_2d()); + CALL_SUBTEST_3(test_cuda_convolution_3d()); + CALL_SUBTEST_3(test_cuda_convolution_3d()); + CALL_SUBTEST_4(test_cuda_lgamma(1.0f)); + CALL_SUBTEST_4(test_cuda_lgamma(100.0f)); + CALL_SUBTEST_4(test_cuda_lgamma(0.01f)); + CALL_SUBTEST_4(test_cuda_lgamma(0.001f)); + CALL_SUBTEST_4(test_cuda_erf(1.0f)); + CALL_SUBTEST_4(test_cuda_erf(100.0f)); + CALL_SUBTEST_4(test_cuda_erf(0.01f)); + CALL_SUBTEST_4(test_cuda_erf(0.001f)); + CALL_SUBTEST_4(test_cuda_erfc(1.0f)); // CALL_SUBTEST(test_cuda_erfc(100.0f)); - CALL_SUBTEST(test_cuda_erfc(5.0f)); // CUDA erfc lacks precision for large inputs - CALL_SUBTEST(test_cuda_erfc(0.01f)); - CALL_SUBTEST(test_cuda_erfc(0.001f)); - CALL_SUBTEST(test_cuda_lgamma(1.0)); - CALL_SUBTEST(test_cuda_lgamma(100.0)); - CALL_SUBTEST(test_cuda_lgamma(0.01)); - CALL_SUBTEST(test_cuda_lgamma(0.001)); - CALL_SUBTEST(test_cuda_erf(1.0)); - CALL_SUBTEST(test_cuda_erf(100.0)); - CALL_SUBTEST(test_cuda_erf(0.01)); - CALL_SUBTEST(test_cuda_erf(0.001)); - CALL_SUBTEST(test_cuda_erfc(1.0)); + CALL_SUBTEST_4(test_cuda_erfc(5.0f)); // CUDA erfc lacks precision for large inputs + CALL_SUBTEST_4(test_cuda_erfc(0.01f)); + CALL_SUBTEST_4(test_cuda_erfc(0.001f)); + CALL_SUBTEST_4(test_cuda_lgamma(1.0)); + CALL_SUBTEST_4(test_cuda_lgamma(100.0)); + CALL_SUBTEST_4(test_cuda_lgamma(0.01)); + CALL_SUBTEST_4(test_cuda_lgamma(0.001)); + CALL_SUBTEST_4(test_cuda_erf(1.0)); + CALL_SUBTEST_4(test_cuda_erf(100.0)); + CALL_SUBTEST_4(test_cuda_erf(0.01)); + CALL_SUBTEST_4(test_cuda_erf(0.001)); + CALL_SUBTEST_4(test_cuda_erfc(1.0)); // CALL_SUBTEST(test_cuda_erfc(100.0)); - CALL_SUBTEST(test_cuda_erfc(5.0)); // CUDA erfc lacks precision for large inputs - CALL_SUBTEST(test_cuda_erfc(0.01)); - CALL_SUBTEST(test_cuda_erfc(0.001)); + CALL_SUBTEST_4(test_cuda_erfc(5.0)); // CUDA erfc lacks precision for large inputs + CALL_SUBTEST_4(test_cuda_erfc(0.01)); + CALL_SUBTEST_4(test_cuda_erfc(0.001)); }