From c01ff45312582b2ea896ee307a49165ca4790332 Mon Sep 17 00:00:00 2001 From: Charles Schlosser Date: Sat, 14 Dec 2024 14:25:04 +0000 Subject: [PATCH] Enable fill_n and memset optimizations for construction and assignment --- Eigen/src/Core/AssignEvaluator.h | 26 +++++++++++++ Eigen/src/Core/CwiseNullaryOp.h | 16 +++++--- Eigen/src/Core/DenseBase.h | 8 ++-- Eigen/src/Core/DiagonalMatrix.h | 7 +++- Eigen/src/Core/Fill.h | 46 ++++++++++++++++++----- Eigen/src/Core/functors/NullaryFunctors.h | 13 ++++++- Eigen/src/Core/util/ForwardDeclarations.h | 11 ++++++ 7 files changed, 105 insertions(+), 22 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index e70d5551a..895484166 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -888,6 +888,32 @@ struct Assignment { } }; +template +struct Assignment, SrcPlainObject>, + assign_op, Dense2Dense, Weak> { + using Scalar = typename DstXprType::Scalar; + using NullaryOp = scalar_constant_op; + using SrcXprType = CwiseNullaryOp; + using Functor = assign_op; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, + const Functor& /*func*/) { + eigen_fill_impl::run(dst, src); + } +}; + +template +struct Assignment, SrcPlainObject>, + assign_op, Dense2Dense, Weak> { + using Scalar = typename DstXprType::Scalar; + using NullaryOp = scalar_zero_op; + using SrcXprType = CwiseNullaryOp; + using Functor = assign_op; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, + const Functor& /*func*/) { + eigen_zero_impl::run(dst, src); + } +}; + // Generic assignment through evalTo. // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. // Note that the last template argument "Weak" is needed to make it possible to perform diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 86ddd5e4d..aa139bf3d 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -71,6 +71,10 @@ class CwiseNullaryOp : public internal::dense_xpr_base= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)); } + EIGEN_DEVICE_FUNC CwiseNullaryOp(Index size, const NullaryOp& func = NullaryOp()) + : CwiseNullaryOp(RowsAtCompileTime == 1 ? 1 : size, RowsAtCompileTime == 1 ? size : 1, func) { + EIGEN_STATIC_ASSERT(CwiseNullaryOp::IsVectorAtCompileTime, YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX); + } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index rows() const { return m_rows.value(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index cols() const { return m_cols.value(); } @@ -480,9 +484,9 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setEqualSpace * \sa Zero(), Zero(Index) */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Zero( +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ZeroReturnType DenseBase::Zero( Index rows, Index cols) { - return Constant(rows, cols, Scalar(0)); + return ZeroReturnType(rows, cols); } /** \returns an expression of a zero vector. @@ -502,9 +506,9 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::Constan * \sa Zero(), Zero(Index,Index) */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Zero( +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ZeroReturnType DenseBase::Zero( Index size) { - return Constant(size, Scalar(0)); + return ZeroReturnType(size); } /** \returns an expression of a fixed-size zero matrix or vector. @@ -518,8 +522,8 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::Constan * \sa Zero(Index), Zero(Index,Index) */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Zero() { - return Constant(Scalar(0)); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ZeroReturnType DenseBase::Zero() { + return ZeroReturnType(RowsAtCompileTime, ColsAtCompileTime); } /** \returns true if *this is approximately equal to the zero matrix, diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index e1fbb0b82..d5906bdb7 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -243,6 +243,8 @@ class DenseBase #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal Represents a matrix with all coefficients equal to one another*/ typedef CwiseNullaryOp, PlainObject> ConstantReturnType; + /** \internal Represents a matrix with all coefficients equal to zero*/ + typedef CwiseNullaryOp, PlainObject> ZeroReturnType; /** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */ EIGEN_DEPRECATED typedef CwiseNullaryOp, PlainObject> SequentialLinSpacedReturnType; /** \internal Represents a vector with linearly spaced coefficients that allows random access. */ @@ -328,9 +330,9 @@ class DenseBase template EIGEN_DEVICE_FUNC static const CwiseNullaryOp NullaryExpr(const CustomNullaryOp& func); - EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols); - EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size); - EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(); + EIGEN_DEVICE_FUNC static const ZeroReturnType Zero(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const ZeroReturnType Zero(Index size); + EIGEN_DEVICE_FUNC static const ZeroReturnType Zero(); EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols); EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size); EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(); diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index fd61bb793..248e4586f 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -256,10 +256,13 @@ class DiagonalMatrix : public DiagonalBase, DiagonalVectorType>> InitializeReturnType; + typedef DiagonalWrapper, DiagonalVectorType>> + ZeroInitializeReturnType; + /** Initializes a diagonal matrix of size SizeAtCompileTime with coefficients set to zero */ - EIGEN_DEVICE_FUNC static const InitializeReturnType Zero() { return DiagonalVectorType::Zero().asDiagonal(); } + EIGEN_DEVICE_FUNC static const ZeroInitializeReturnType Zero() { return DiagonalVectorType::Zero().asDiagonal(); } /** Initializes a diagonal matrix of size dim with coefficients set to zero */ - EIGEN_DEVICE_FUNC static const InitializeReturnType Zero(Index size) { + EIGEN_DEVICE_FUNC static const ZeroInitializeReturnType Zero(Index size) { return DiagonalVectorType::Zero(size).asDiagonal(); } /** Initializes a identity matrix of size SizeAtCompileTime */ diff --git a/Eigen/src/Core/Fill.h b/Eigen/src/Core/Fill.h index 30b36450e..7a8115e86 100644 --- a/Eigen/src/Core/Fill.h +++ b/Eigen/src/Core/Fill.h @@ -54,19 +54,26 @@ template struct eigen_fill_helper>> : eigen_fill_helper>> {}; -template ::value> -struct eigen_fill_impl { +template +struct eigen_fill_impl { using Scalar = typename Xpr::Scalar; using Func = scalar_constant_op; using PlainObject = typename Xpr::PlainObject; - using Constant = CwiseNullaryOp; + using Constant = typename PlainObject::ConstantReturnType; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const Scalar& val) { - dst = Constant(dst.rows(), dst.cols(), Func(val)); + const Constant src(dst.rows(), dst.cols(), val); + run(dst, src); + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const SrcXpr& src) { + call_dense_assignment_loop(dst, src, assign_op()); } }; -#if !EIGEN_COMP_MSVC -#ifndef EIGEN_GPU_COMPILE_PHASE +#if EIGEN_COMP_MSVC || defined(EIGEN_GPU_COMPILE_PHASE) +template +struct eigen_fill_impl : eigen_fill_impl {}; +#else template struct eigen_fill_impl { using Scalar = typename Xpr::Scalar; @@ -74,19 +81,33 @@ struct eigen_fill_impl { EIGEN_USING_STD(fill_n); fill_n(dst.data(), dst.size(), val); } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const SrcXpr& src) { + resize_if_allowed(dst, src, assign_op()); + const Scalar& val = src.functor()(); + run(dst, val); + } }; #endif -#endif template struct eigen_memset_helper { static constexpr bool value = std::is_trivial::value && eigen_fill_helper::value; }; -template ::value> -struct eigen_zero_impl { +template +struct eigen_zero_impl { using Scalar = typename Xpr::Scalar; - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst) { eigen_fill_impl::run(dst, Scalar(0)); } + using PlainObject = typename Xpr::PlainObject; + using Zero = typename PlainObject::ZeroReturnType; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst) { + const Zero src(dst.rows(), dst.cols()); + run(dst, src); + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const SrcXpr& src) { + call_dense_assignment_loop(dst, src, assign_op()); + } }; template @@ -104,6 +125,11 @@ struct eigen_zero_impl { EIGEN_USING_STD(memset); memset(dst_ptr, 0, num_bytes); } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const SrcXpr& src) { + resize_if_allowed(dst, src, assign_op()); + run(dst); + } }; } // namespace internal diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index a478b80df..14b56d733 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -19,7 +19,6 @@ namespace internal { template struct scalar_constant_op { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()() const { return m_other; } template @@ -37,6 +36,18 @@ struct functor_traits > { }; }; +template +struct scalar_zero_op { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_zero_op() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()() const { return Scalar(0); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp() const { + return internal::pzero(PacketType()); + } +}; +template +struct functor_traits> : functor_traits> {}; + template struct scalar_identity_op { template diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index cf2535928..fcde64afe 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -505,6 +505,17 @@ struct stem_function { template struct DeviceWrapper; +namespace internal { +template +struct eigen_fill_helper; +template ::value> +struct eigen_fill_impl; +template +struct eigen_memset_helper; +template ::value> +struct eigen_zero_impl; +} // namespace internal + } // end namespace Eigen #endif // EIGEN_FORWARDDECLARATIONS_H