From 554982beeffcf28457d3989d3cbe7d2968b056bf Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Fri, 8 Oct 2021 11:38:13 -0700 Subject: [PATCH] Disable Tree reduction for GPU. For moderately sized inputs, running the Tree reduction quickly fills/overflows the GPU thread stack space, leading to memory errors. This was happening in the `cxx11_tensor_complex_gpu` test, for example. Disabling tree reduction on GPU fixes this. (cherry picked from commit 24ebb37f38287d65c0e0b60c714e39faffeb5b94) --- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 583f46256..ff7c5a133 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -166,8 +166,12 @@ struct GenericDimReducer<-1, Self, Op> { }; template + bool UseTreeReduction = (!Self::ReducerTraits::IsStateful && + !Self::ReducerTraits::IsExactlyAssociative && + // GPU threads can quickly run out of stack space + // for moderately sized inputs. + !Self::RunningOnGPU + )> struct InnerMostDimReducer { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { typename Self::CoeffReturnType accum = reducer.initialize(); @@ -528,6 +532,18 @@ struct TensorReductionEvaluatorBase::size; + + // For full reductions +#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC)) + static constexpr bool RunningOnGPU = internal::is_same::value; + static constexpr bool RunningOnSycl = false; +#elif defined(EIGEN_USE_SYCL) +static const bool RunningOnSycl = internal::is_same::type, Eigen::SyclDevice>::value; +static const bool RunningOnGPU = false; +#else + static constexpr bool RunningOnGPU = false; + static constexpr bool RunningOnSycl = false; +#endif enum { IsAligned = false, @@ -950,17 +966,6 @@ struct TensorReductionEvaluatorBase::value; - static const bool RunningOnSycl = false; -#elif defined(EIGEN_USE_SYCL) -static const bool RunningOnSycl = internal::is_same::type, Eigen::SyclDevice>::value; -static const bool RunningOnGPU = false; -#else - static const bool RunningOnGPU = false; - static const bool RunningOnSycl = false; -#endif EvaluatorPointerType m_result; const Device EIGEN_DEVICE_REF m_device;