From 925fb6b93710b95082ba44d30405289dff3707eb Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 10 Jun 2014 09:14:44 -0700 Subject: [PATCH] TensorEval are now typed on the device: this will make it possible to use partial template specialization to optimize the strategy of each evaluator for each device type. Started work on partial evaluations. --- .../Eigen/CXX11/src/Tensor/TensorAssign.h | 42 +++++----- .../Eigen/CXX11/src/Tensor/TensorBase.h | 14 +++- .../CXX11/src/Tensor/TensorContraction.h | 26 +++---- .../CXX11/src/Tensor/TensorConvolution.h | 20 ++--- .../Eigen/CXX11/src/Tensor/TensorDevice.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorDeviceType.h | 28 ++++++- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 76 +++++++++---------- .../src/Tensor/TensorForwardDeclarations.h | 9 ++- .../Eigen/CXX11/src/Tensor/TensorMorphing.h | 14 ++-- 9 files changed, 129 insertions(+), 102 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index da1eb62cb..633a7a31b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -32,15 +32,15 @@ namespace Eigen { namespace internal { // Default strategy: the expressions are evaluated with a single cpu thread. -template::PacketAccess & TensorEvaluator::PacketAccess> +template::PacketAccess & TensorEvaluator::PacketAccess> struct TensorAssign { typedef typename Derived1::Index Index; EIGEN_DEVICE_FUNC - static inline void run(Derived1& dst, const Derived2& src) + static inline void run(Derived1& dst, const Derived2& src, const Device& device = Device()) { - TensorEvaluator evalDst(dst); - TensorEvaluator evalSrc(src); + TensorEvaluator evalDst(dst, device); + TensorEvaluator evalSrc(src, device); const Index size = dst.size(); for (Index i = 0; i < size; ++i) { evalDst.coeffRef(i) = evalSrc.coeff(i); @@ -49,19 +49,19 @@ struct TensorAssign }; -template -struct TensorAssign +template +struct TensorAssign { typedef typename Derived1::Index Index; - static inline void run(Derived1& dst, const Derived2& src) + static inline void run(Derived1& dst, const Derived2& src, const Device& device = Device()) { - TensorEvaluator evalDst(dst); - TensorEvaluator evalSrc(src); + TensorEvaluator evalDst(dst, device); + TensorEvaluator evalSrc(src, device); const Index size = dst.size(); - static const int LhsStoreMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; - static const int RhsLoadMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; - static const int PacketSize = unpacket_traits::PacketReturnType>::size; + static const int LhsStoreMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; + static const int RhsLoadMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; + static const int PacketSize = unpacket_traits::PacketReturnType>::size; const int VectorizedSize = (size / PacketSize) * PacketSize; for (Index i = 0; i < VectorizedSize; i += PacketSize) { @@ -116,12 +116,12 @@ struct TensorAssignMultiThreaded typedef typename Derived1::Index Index; static inline void run(Derived1& dst, const Derived2& src, const ThreadPoolDevice& device) { - TensorEvaluator evalDst(dst); - TensorEvaluator evalSrc(src); + TensorEvaluator evalDst(dst, DefaultDevice()); + TensorEvaluator evalSrc(src, Defaultevice()); const Index size = dst.size(); - static const bool Vectorizable = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess; - static const int PacketSize = Vectorizable ? unpacket_traits::PacketReturnType>::size : 1; + static const bool Vectorizable = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess; + static const int PacketSize = Vectorizable ? unpacket_traits::PacketReturnType>::size : 1; int blocksz = static_cast(ceil(static_cast(size)/device.numThreads()) + PacketSize - 1); const Index blocksize = std::max(PacketSize, (blocksz - (blocksz % PacketSize))); @@ -131,7 +131,7 @@ struct TensorAssignMultiThreaded vector > results; results.reserve(numblocks); for (int i = 0; i < numblocks; ++i) { - results.push_back(std::async(std::launch::async, &EvalRange, TensorEvaluator, Index>::run, evalDst, evalSrc, i*blocksize, (i+1)*blocksize)); + results.push_back(std::async(std::launch::async, &EvalRange, TensorEvaluator, Index>::run, evalDst, evalSrc, i*blocksize, (i+1)*blocksize)); } for (int i = 0; i < numblocks; ++i) { @@ -167,19 +167,19 @@ struct TensorAssignGpu typedef typename Derived1::Index Index; static inline void run(Derived1& dst, const Derived2& src, const GpuDevice& device) { - TensorEvaluator evalDst(dst); - TensorEvaluator evalSrc(src); + TensorEvaluator evalDst(dst, device); + TensorEvaluator evalSrc(src, device); const Index size = dst.size(); const int block_size = std::min(size, 32*32); const int num_blocks = size / block_size; - EigenMetaKernelNoCheck, TensorEvaluator > <<>>(evalDst, evalSrc); + EigenMetaKernelNoCheck, TensorEvaluator > <<>>(evalDst, evalSrc); const int remaining_items = size % block_size; if (remaining_items > 0) { const int peel_start_offset = num_blocks * block_size; const int peel_block_size = std::min(size, 32); const int peel_num_blocks = (remaining_items + peel_block_size - 1) / peel_block_size; - EigenMetaKernelPeel, TensorEvaluator > <<>>(evalDst, evalSrc, peel_start_offset, size); + EigenMetaKernelPeel, TensorEvaluator > <<>>(evalDst, evalSrc, peel_start_offset, size); } } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index e447a5d40..6b53d2a3d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -198,19 +198,25 @@ class TensorBase } // Coefficient-wise ternary operators. - template - inline const TensorSelectOp + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorSelectOp select(const ThenDerived& thenTensor, const ElseDerived& elseTensor) const { return TensorSelectOp(derived(), thenTensor.derived(), elseTensor.derived()); } // Morphing operators (slicing tbd). - template - inline const TensorReshapingOp + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReshapingOp reshape(const NewDimensions& newDimensions) const { return TensorReshapingOp(derived(), newDimensions); } + // Force the evaluation of the expression. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorForcedEvalOp eval() const { + return TensorForcedEvalOp(derived()); + } + protected: template friend class TensorBase; EIGEN_DEVICE_FUNC diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 5149de1bb..cadbabda2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -102,31 +102,31 @@ template <> struct max_n_1<0> { }; -template -struct TensorEvaluator > +template +struct TensorEvaluator, Device> { typedef TensorContractionOp XprType; - static const int NumDims = max_n_1::Dimensions::count + TensorEvaluator::Dimensions::count - 2 * internal::array_size::value>::size; + static const int NumDims = max_n_1::Dimensions::count + TensorEvaluator::Dimensions::count - 2 * internal::array_size::value>::size; typedef typename XprType::Index Index; typedef DSizes Dimensions; enum { - IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, PacketAccess = /*TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess */ false, }; - TensorEvaluator(const XprType& op) - : m_leftImpl(op.lhsExpression()), m_rightImpl(op.rhsExpression()) + TensorEvaluator(const XprType& op, const Device& device) + : m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device) { Index index = 0; Index stride = 1; m_shiftright = 1; int skipped = 0; - const typename TensorEvaluator::Dimensions& left_dims = m_leftImpl.dimensions(); - for (int i = 0; i < TensorEvaluator::Dimensions::count; ++i) { + const typename TensorEvaluator::Dimensions& left_dims = m_leftImpl.dimensions(); + for (int i = 0; i < TensorEvaluator::Dimensions::count; ++i) { bool skip = false; for (int j = 0; j < internal::array_size::value; ++j) { if (op.indices()[j].first == i) { @@ -148,8 +148,8 @@ struct TensorEvaluator::Dimensions& right_dims = m_rightImpl.dimensions(); - for (int i = 0; i < TensorEvaluator::Dimensions::count; ++i) { + const typename TensorEvaluator::Dimensions& right_dims = m_rightImpl.dimensions(); + for (int i = 0; i < TensorEvaluator::Dimensions::count; ++i) { bool skip = false; for (int j = 0; j < internal::array_size::value; ++j) { if (op.indices()[j].second == i) { @@ -168,7 +168,7 @@ struct TensorEvaluator::Dimensions::count + TensorEvaluator::Dimensions::count == 2 * internal::array_size::value) { + if (TensorEvaluator::Dimensions::count + TensorEvaluator::Dimensions::count == 2 * internal::array_size::value) { m_dimensions[0] = 1; } } @@ -223,8 +223,8 @@ struct TensorEvaluator::value> m_stitchsize; Index m_shiftright; Dimensions m_dimensions; - TensorEvaluator m_leftImpl; - TensorEvaluator m_rightImpl; + TensorEvaluator m_leftImpl; + TensorEvaluator m_rightImpl; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 501e9a522..a554b8260 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -94,27 +94,27 @@ class TensorConvolutionOp : public TensorBase -struct TensorEvaluator > +template +struct TensorEvaluator, Device> { typedef TensorConvolutionOp XprType; - static const int NumDims = TensorEvaluator::Dimensions::count; + static const int NumDims = TensorEvaluator::Dimensions::count; static const int KernelDims = Indices::size; typedef typename XprType::Index Index; typedef DSizes Dimensions; enum { - IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, PacketAccess = /*TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess */ false, }; - TensorEvaluator(const XprType& op) - : m_inputImpl(op.inputExpression()), m_kernelImpl(op.kernelExpression()), m_dimensions(op.inputExpression().dimensions()) + TensorEvaluator(const XprType& op, const Device& device) + : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_dimensions(op.inputExpression().dimensions()) { - const typename TensorEvaluator::Dimensions& input_dims = m_inputImpl.dimensions(); - const typename TensorEvaluator::Dimensions& kernel_dims = m_kernelImpl.dimensions(); + const typename TensorEvaluator::Dimensions& input_dims = m_inputImpl.dimensions(); + const typename TensorEvaluator::Dimensions& kernel_dims = m_kernelImpl.dimensions(); for (int i = 0; i < NumDims; ++i) { if (i > 0) { @@ -200,8 +200,8 @@ struct TensorEvaluator m_indexStride; array m_kernelStride; Dimensions m_dimensions; - TensorEvaluator m_inputImpl; - TensorEvaluator m_kernelImpl; + TensorEvaluator m_inputImpl; + TensorEvaluator m_kernelImpl; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h index dbe60a165..ce524a818 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -31,7 +31,7 @@ template class TensorDevice { template EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { - internal::TensorAssign::run(m_expression, other); + internal::TensorAssign::run(m_expression, other, m_device); return *this; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h index d7f5ab7c9..142edda14 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h @@ -15,6 +15,12 @@ namespace Eigen { // Default device for the machine (typically a single cpu core) struct DefaultDevice { + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + internal::aligned_free(buffer); + } }; @@ -22,14 +28,19 @@ struct DefaultDevice { // We should really use a thread pool here but first we need to find a portable thread pool library. #ifdef EIGEN_USE_THREADS struct ThreadPoolDevice { - ThreadPoolDevice(/*ThreadPool* pool, */size_t num_cores) : /*pool_(pool), */num_threads_(num_cores) { } + ThreadPoolDevice(/*ThreadPool* pool, */size_t num_cores) : /*pool_(pool), */num_threads_(num_cores) { } size_t numThreads() const { return num_threads_; } - /*ThreadPool* threadPool() const { return pool_; }*/ + + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + internal::aligned_free(buffer); + } private: // todo: NUMA, ... size_t num_threads_; - /*ThreadPool* pool_;*/ }; #endif @@ -40,7 +51,16 @@ struct GpuDevice { // The cudastream is not owned: the caller is responsible for its initialization and eventual destruction. GpuDevice(const cudaStream_t* stream) : stream_(stream) { eigen_assert(stream); } - const cudaStream_t& stream() const { return *stream_; } + EIGEN_STRONG_INLINE const cudaStream_t& stream() const { return *stream_; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + void* result; + cudaMalloc(&result, num_bytes); + return result; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + cudaFree(buffer); + } private: // TODO: multigpu. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index ab2513cea..80fe06957 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -23,7 +23,7 @@ namespace Eigen { * leading to lvalues (slicing, reshaping, etc...) */ -template +template struct TensorEvaluator { typedef typename Derived::Index Index; @@ -38,7 +38,7 @@ struct TensorEvaluator PacketAccess = Derived::PacketAccess, }; - EIGEN_DEVICE_FUNC TensorEvaluator(Derived& m) + EIGEN_DEVICE_FUNC TensorEvaluator(Derived& m, const Device&) : m_data(const_cast(m.data())), m_dims(m.dimensions()) { } @@ -73,8 +73,8 @@ struct TensorEvaluator // -------------------- CwiseNullaryOp -------------------- -template -struct TensorEvaluator > +template +struct TensorEvaluator, Device> { typedef TensorCwiseNullaryOp XprType; @@ -84,14 +84,14 @@ struct TensorEvaluator > }; EIGEN_DEVICE_FUNC - TensorEvaluator(const XprType& op) - : m_functor(op.functor()), m_argImpl(op.nestedExpression()) + TensorEvaluator(const XprType& op, const Device& device) + : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device) { } typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename TensorEvaluator::Dimensions Dimensions; + typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } @@ -108,32 +108,32 @@ struct TensorEvaluator > private: const NullaryOp m_functor; - TensorEvaluator m_argImpl; + TensorEvaluator m_argImpl; }; // -------------------- CwiseUnaryOp -------------------- -template -struct TensorEvaluator > +template +struct TensorEvaluator, Device> { typedef TensorCwiseUnaryOp XprType; enum { - IsAligned = TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, }; - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op) + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : m_functor(op.functor()), - m_argImpl(op.nestedExpression()) + m_argImpl(op.nestedExpression(), device) { } typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename TensorEvaluator::Dimensions Dimensions; + typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } @@ -150,33 +150,33 @@ struct TensorEvaluator > private: const UnaryOp m_functor; - TensorEvaluator m_argImpl; + TensorEvaluator m_argImpl; }; // -------------------- CwiseBinaryOp -------------------- -template -struct TensorEvaluator > +template +struct TensorEvaluator, Device> { typedef TensorCwiseBinaryOp XprType; enum { - IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, }; - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op) + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : m_functor(op.functor()), - m_leftImpl(op.lhsExpression()), - m_rightImpl(op.rhsExpression()) + m_leftImpl(op.lhsExpression(), device), + m_rightImpl(op.rhsExpression(), device) { } typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename TensorEvaluator::Dimensions Dimensions; + typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { @@ -196,34 +196,34 @@ struct TensorEvaluator m_leftImpl; - TensorEvaluator m_rightImpl; + TensorEvaluator m_leftImpl; + TensorEvaluator m_rightImpl; }; // -------------------- SelectOp -------------------- -template -struct TensorEvaluator > +template +struct TensorEvaluator, Device> { typedef TensorSelectOp XprType; enum { - IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess/* & + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess/* & TensorEvaluator::PacketAccess*/, }; - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op) - : m_condImpl(op.ifExpression()), - m_thenImpl(op.thenExpression()), - m_elseImpl(op.elseExpression()) + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + : m_condImpl(op.ifExpression(), device), + m_thenImpl(op.thenExpression(), device), + m_elseImpl(op.elseExpression(), device) { } typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketReturnType PacketReturnType; - typedef typename TensorEvaluator::Dimensions Dimensions; + typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { @@ -248,9 +248,9 @@ struct TensorEvaluator } private: - TensorEvaluator m_condImpl; - TensorEvaluator m_thenImpl; - TensorEvaluator m_elseImpl; + TensorEvaluator m_condImpl; + TensorEvaluator m_thenImpl; + TensorEvaluator m_elseImpl; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 1fb90478f..27bfe1d73 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -21,16 +21,17 @@ template class TensorCwiseNullaryO template class TensorCwiseUnaryOp; template class TensorCwiseBinaryOp; template class TensorSelectOp; +template class TensorReductionOp; template class TensorContractionOp; template class TensorConvolutionOp; template class TensorReshapingOp; -template class TensorDevice; +template class TensorForcedEvalOp; -// Move to internal? -template struct TensorEvaluator; +template class TensorDevice; +template struct TensorEvaluator; namespace internal { -template struct TensorAssign; +template struct TensorAssign; } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index 7d5f9271e..e9e74581f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -77,19 +77,19 @@ class TensorReshapingOp : public TensorBase -struct TensorEvaluator > +template +struct TensorEvaluator, Device> { typedef TensorReshapingOp XprType; typedef NewDimensions Dimensions; enum { - IsAligned = TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess, + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, }; - TensorEvaluator(const XprType& op) - : m_impl(op.expression()), m_dimensions(op.dimensions()) + TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_dimensions(op.dimensions()) { } typedef typename XprType::Index Index; @@ -111,7 +111,7 @@ struct TensorEvaluator > private: NewDimensions m_dimensions; - TensorEvaluator m_impl; + TensorEvaluator m_impl; };