diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h index 3471d1056..2a224eddf 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h @@ -531,7 +531,7 @@ EigenContractionKernel(const LhsMapper lhs, const RhsMapper rhs, template -__device__ EIGEN_STRONG_INLINE void +__device__ __forceinline__ void EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rhs, const OutputMapper output, float2 lhs_shmem2[][16], float2 rhs_shmem2[][8], const Index m_size, @@ -771,7 +771,7 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh template -__device__ EIGEN_STRONG_INLINE void +__device__ __forceinline__ void EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, const OutputMapper output, float2 lhs_shmem2[][32], float2 rhs_shmem2[][8], const Index m_size, @@ -1335,7 +1335,7 @@ struct TensorEvaluator), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k); } };