diff --git a/Eigen/Core b/Eigen/Core index c4bde63f1..4d23920d5 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -373,7 +373,6 @@ using std::ptrdiff_t; #include "src/Core/arch/AVX512/GemmKernel.h" #endif -#include "src/Core/BooleanRedux.h" #include "src/Core/Select.h" #include "src/Core/VectorwiseOp.h" #include "src/Core/PartialReduxEvaluator.h" diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h deleted file mode 100644 index e7d1a348b..000000000 --- a/Eigen/src/Core/BooleanRedux.h +++ /dev/null @@ -1,166 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_ALLANDANY_H -#define EIGEN_ALLANDANY_H - -#include "./InternalHeaderCheck.h" - -namespace Eigen { - -namespace internal { - -template -struct all_unroller -{ - enum { - IsRowMajor = (int(Derived::Flags) & int(RowMajor)), - i = (UnrollCount-1) / InnerSize, - j = (UnrollCount-1) % InnerSize - }; - - EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat) - { - return all_unroller::run(mat) && mat.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i) != typename Derived::CoeffReturnType(0); - } -}; - -template -struct all_unroller -{ - EIGEN_DEVICE_FUNC static inline bool run(const Derived &/*mat*/) { return true; } -}; - -template -struct all_unroller -{ - EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; } -}; - -template -struct any_unroller -{ - enum { - IsRowMajor = (int(Derived::Flags) & int(RowMajor)), - i = (UnrollCount-1) / InnerSize, - j = (UnrollCount-1) % InnerSize - }; - - EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat) - { - return any_unroller::run(mat) || mat.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i) != typename Derived::CoeffReturnType(0); - } -}; - -template -struct any_unroller -{ - EIGEN_DEVICE_FUNC static inline bool run(const Derived & /*mat*/) { return false; } -}; - -template -struct any_unroller -{ - EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; } -}; - -} // end namespace internal - -/** \returns true if all coefficients are true - * - * Example: \include MatrixBase_all.cpp - * Output: \verbinclude MatrixBase_all.out - * - * \sa any(), Cwise::operator<() - */ -template -EIGEN_DEVICE_FUNC inline bool DenseBase::all() const -{ - typedef internal::evaluator Evaluator; - enum { - unroll = SizeAtCompileTime != Dynamic - && SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits::AddCost)) <= EIGEN_UNROLLING_LIMIT, - }; - Evaluator evaluator(derived()); - if(unroll) - return internal::all_unroller::run(evaluator); - else - { - for(Index i = 0; i < derived().outerSize(); ++i) - for(Index j = 0; j < derived().innerSize(); ++j) - if (evaluator.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i) == Scalar(0)) return false; - return true; - } -} - -/** \returns true if at least one coefficient is true - * - * \sa all() - */ -template -EIGEN_DEVICE_FUNC inline bool DenseBase::any() const -{ - typedef internal::evaluator Evaluator; - enum { - unroll = SizeAtCompileTime != Dynamic - && SizeAtCompileTime * (int(Evaluator::CoeffReadCost) + int(NumTraits::AddCost)) <= EIGEN_UNROLLING_LIMIT, - }; - Evaluator evaluator(derived()); - if(unroll) - return internal::any_unroller::run(evaluator); - else - { - for(Index i = 0; i < derived().outerSize(); ++i) - for(Index j = 0; j < derived().innerSize(); ++j) - if (evaluator.coeff(IsRowMajor ? i : j, IsRowMajor ? j : i) != Scalar(0)) return true; - return false; - } -} - -/** \returns the number of coefficients which evaluate to true - * - * \sa all(), any() - */ -template -EIGEN_DEVICE_FUNC inline Eigen::Index DenseBase::count() const -{ - return derived().template cast().template cast().sum(); -} - -/** \returns true is \c *this contains at least one Not A Number (NaN). - * - * \sa allFinite() - */ -template -EIGEN_DEVICE_FUNC inline bool DenseBase::hasNaN() const -{ -#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) - return derived().array().isNaN().any(); -#else - return !((derived().array()==derived().array()).all()); -#endif -} - -/** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values. - * - * \sa hasNaN() - */ -template -EIGEN_DEVICE_FUNC inline bool DenseBase::allFinite() const -{ -#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) - return derived().array().isFinite().all(); -#else - return !((derived()-derived()).hasNaN()); -#endif -} - -} // end namespace Eigen - -#endif // EIGEN_ALLANDANY_H diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h index b85747b22..2f83ba878 100644 --- a/Eigen/src/Core/Visitor.h +++ b/Eigen/src/Core/Visitor.h @@ -16,105 +16,295 @@ namespace Eigen { namespace internal { -template::PacketAccess)> +template ::PacketAccess), bool LinearAccess = false, + bool ShortCircuitEvaluation = false> struct visitor_impl; -template -struct visitor_impl -{ - enum { - col = Derived::IsRowMajor ? (UnrollCount-1) % Derived::ColsAtCompileTime - : (UnrollCount-1) / Derived::RowsAtCompileTime, - row = Derived::IsRowMajor ? (UnrollCount-1) / Derived::ColsAtCompileTime - : (UnrollCount-1) % Derived::RowsAtCompileTime - }; - - EIGEN_DEVICE_FUNC - static inline void run(const Derived &mat, Visitor& visitor) - { - visitor_impl::run(mat, visitor); - visitor(mat.coeff(row, col), row, col); +template +struct short_circuit_eval_impl { + // if short circuit evaluation is not used, do nothing + static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(const Visitor&) { return false; } +}; +template +struct short_circuit_eval_impl { + // if short circuit evaluation is used, check the visitor + static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(const Visitor& visitor) { + return visitor.done(); } }; -template -struct visitor_impl -{ - EIGEN_DEVICE_FUNC - static inline void run(const Derived &mat, Visitor& visitor) +// unrolled inner-outer traversal +template +struct visitor_impl { + // don't use short circuit evaulation for unrolled version + using Scalar = typename Derived::Scalar; + using Packet = typename packet_traits::type; + static constexpr bool RowMajor = Derived::IsRowMajor; + static constexpr int RowsAtCompileTime = Derived::RowsAtCompileTime; + static constexpr int ColsAtCompileTime = Derived::ColsAtCompileTime; + static constexpr int PacketSize = packet_traits::size; + + static constexpr bool CanVectorize(int K) { + constexpr int InnerSizeAtCompileTime = RowMajor ? ColsAtCompileTime : RowsAtCompileTime; + return Vectorize && (InnerSizeAtCompileTime - (K % InnerSizeAtCompileTime) >= PacketSize); + } + + template = true> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived&, Visitor&) {} + + template = true> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& mat, Visitor& visitor) { - return visitor.init(mat.coeff(0, 0), 0, 0); + visitor.init(mat.coeff(0, 0), 0, 0); + run<1>(mat, visitor); + } + + template = true> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& mat, Visitor& visitor) + { + static constexpr int R = RowMajor ? (K / ColsAtCompileTime) : (K % RowsAtCompileTime); + static constexpr int C = RowMajor ? (K % ColsAtCompileTime) : (K / RowsAtCompileTime); + visitor(mat.coeff(R, C), R, C); + run(mat, visitor); + } + + template = true> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& mat, Visitor& visitor) + { + Packet P = mat.template packet(0, 0); + visitor.initpacket(P, 0, 0); + run(mat, visitor); + } + + template = true> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& mat, Visitor& visitor) + { + static constexpr int R = RowMajor ? (K / ColsAtCompileTime) : (K % RowsAtCompileTime); + static constexpr int C = RowMajor ? (K % ColsAtCompileTime) : (K / RowsAtCompileTime); + Packet P = mat.template packet(R, C); + visitor.packet(P, R, C); + run(mat, visitor); } }; -// This specialization enables visitors on empty matrices at compile-time -template -struct visitor_impl { - EIGEN_DEVICE_FUNC - static inline void run(const Derived &/*mat*/, Visitor& /*visitor*/) - {} +// unrolled linear traversal +template +struct visitor_impl { + // don't use short circuit evaulation for unrolled version + using Scalar = typename Derived::Scalar; + using Packet = typename packet_traits::type; + static constexpr int PacketSize = packet_traits::size; + + static constexpr bool CanVectorize(int K) { + return Vectorize && ((UnrollCount - K) >= PacketSize); + } + + // empty + template = true> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived&, Visitor&) {} + + // scalar initialization + template = true> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& mat, Visitor& visitor) { + visitor.init(mat.coeff(0), 0); + run<1>(mat, visitor); + } + + // scalar iteration + template = true> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& mat, Visitor& visitor) { + visitor(mat.coeff(K), K); + run(mat, visitor); + } + + // vector initialization + template = true> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& mat, Visitor& visitor) { + Packet P = mat.template packet(0); + visitor.initpacket(P, 0); + run(mat, visitor); + } + + // vector iteration + template = true> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& mat, Visitor& visitor) { + Packet P = mat.template packet(K); + visitor.packet(P, K); + run(mat, visitor); + } }; -template -struct visitor_impl -{ - EIGEN_DEVICE_FUNC - static inline void run(const Derived& mat, Visitor& visitor) - { - visitor.init(mat.coeff(0,0), 0, 0); - if (Derived::IsRowMajor) { - for(Index i = 1; i < mat.cols(); ++i) { - visitor(mat.coeff(0, i), 0, i); +// dynamic scalar outer-inner traversal +template +struct visitor_impl { + using short_circuit = short_circuit_eval_impl; + static constexpr bool RowMajor = Derived::IsRowMajor; + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& mat, Visitor& visitor) { + const Index innerSize = RowMajor ? mat.cols() : mat.rows(); + const Index outerSize = RowMajor ? mat.rows() : mat.cols(); + if (innerSize == 0 || outerSize == 0) return; + { + visitor.init(mat.coeff(0, 0), 0, 0); + if (short_circuit::run(visitor)) return; + for (Index i = 1; i < innerSize; ++i) { + Index r = RowMajor ? 0 : i; + Index c = RowMajor ? i : 0; + visitor(mat.coeff(r, c), r, c); + if EIGEN_PREDICT_FALSE(short_circuit::run(visitor)) return; } - for(Index j = 1; j < mat.rows(); ++j) { - for(Index i = 0; i < mat.cols(); ++i) { - visitor(mat.coeff(j, i), j, i); - } - } - } else { - for(Index i = 1; i < mat.rows(); ++i) { - visitor(mat.coeff(i, 0), i, 0); - } - for(Index j = 1; j < mat.cols(); ++j) { - for(Index i = 0; i < mat.rows(); ++i) { - visitor(mat.coeff(i, j), i, j); - } + } + for (Index j = 1; j < outerSize; j++) { + for (Index i = 0; i < innerSize; ++i) { + Index r = RowMajor ? j : i; + Index c = RowMajor ? i : j; + visitor(mat.coeff(r, c), r, c); + if EIGEN_PREDICT_FALSE(short_circuit::run(visitor)) return; } } } }; -template -struct visitor_impl -{ - typedef typename Derived::Scalar Scalar; - typedef typename packet_traits::type Packet; +// dynamic vectorized outer-inner traversal +template +struct visitor_impl { + using Scalar = typename Derived::Scalar; + using Packet = typename packet_traits::type; + static constexpr int PacketSize = packet_traits::size; + using short_circuit = short_circuit_eval_impl; + static constexpr bool RowMajor = Derived::IsRowMajor; - EIGEN_DEVICE_FUNC - static inline void run(const Derived& mat, Visitor& visitor) - { - const Index PacketSize = packet_traits::size; - visitor.init(mat.coeff(0,0), 0, 0); - if (Derived::IsRowMajor) { - for(Index i = 0; i < mat.rows(); ++i) { - Index j = i == 0 ? 1 : 0; - for(; j+PacketSize-1 < mat.cols(); j += PacketSize) { - Packet p = mat.packet(i, j); - visitor.packet(p, i, j); - } - for(; j < mat.cols(); ++j) - visitor(mat.coeff(i, j), i, j); + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& mat, Visitor& visitor) { + const Index innerSize = RowMajor ? mat.cols() : mat.rows(); + const Index outerSize = RowMajor ? mat.rows() : mat.cols(); + if (innerSize == 0 || outerSize == 0) return; + { + Index i = 0; + if (innerSize < PacketSize) { + visitor.init(mat.coeff(0, 0), 0, 0); + i = 1; + } else { + Packet p = mat.template packet(0, 0); + visitor.initpacket(p, 0, 0); + i = PacketSize; } + if EIGEN_PREDICT_FALSE(short_circuit::run(visitor)) return; + for (; i + PacketSize - 1 < innerSize; i += PacketSize) { + Index r = RowMajor ? 0 : i; + Index c = RowMajor ? i : 0; + Packet p = mat.template packet(r, c); + visitor.packet(p, r, c); + if EIGEN_PREDICT_FALSE(short_circuit::run(visitor)) return; + } + for (; i < innerSize; ++i) { + Index r = RowMajor ? 0 : i; + Index c = RowMajor ? i : 0; + visitor(mat.coeff(r, c), r, c); + if EIGEN_PREDICT_FALSE(short_circuit::run(visitor)) return; + } + } + for (Index j = 1; j < outerSize; j++) { + Index i = 0; + for (; i + PacketSize - 1 < innerSize; i += PacketSize) { + Index r = RowMajor ? j : i; + Index c = RowMajor ? i : j; + Packet p = mat.template packet(r, c); + visitor.packet(p, r, c); + if EIGEN_PREDICT_FALSE(short_circuit::run(visitor)) return; + } + for (; i < innerSize; ++i) { + Index r = RowMajor ? j : i; + Index c = RowMajor ? i : j; + visitor(mat.coeff(r, c), r, c); + if EIGEN_PREDICT_FALSE(short_circuit::run(visitor)) return; + } + } + } +}; + +// dynamic scalar linear traversal +template +struct visitor_impl { + using short_circuit = short_circuit_eval_impl; + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& mat, Visitor& visitor) { + const Index size = mat.size(); + if (size == 0) return; + visitor.init(mat.coeff(0), 0); + if EIGEN_PREDICT_FALSE(short_circuit::run(visitor)) return; + for (Index k = 1; k < size; k++) { + visitor(mat.coeff(k), k); + if EIGEN_PREDICT_FALSE(short_circuit::run(visitor)) return; + } + } +}; + +// dynamic vectorized linear traversal +template +struct visitor_impl { + using Scalar = typename Derived::Scalar; + using Packet = typename packet_traits::type; + static constexpr int PacketSize = packet_traits::size; + using short_circuit = short_circuit_eval_impl; + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& mat, Visitor& visitor) { + const Index size = mat.size(); + if (size == 0) return; + Index k = 0; + if (size < PacketSize) { + visitor.init(mat.coeff(0), 0); + k = 1; } else { - for(Index j = 0; j < mat.cols(); ++j) { - Index i = j == 0 ? 1 : 0; - for(; i+PacketSize-1 < mat.rows(); i += PacketSize) { - Packet p = mat.packet(i, j); - visitor.packet(p, i, j); - } - for(; i < mat.rows(); ++i) - visitor(mat.coeff(i, j), i, j); - } + Packet p = mat.template packet(k); + visitor.initpacket(p, k); + k = PacketSize; + } + if EIGEN_PREDICT_FALSE(short_circuit::run(visitor)) return; + for (; k + PacketSize - 1 < size; k += PacketSize) { + Packet p = mat.template packet(k); + visitor.packet(p, k); + if EIGEN_PREDICT_FALSE(short_circuit::run(visitor)) return; + } + for (; k < size; k++) { + visitor(mat.coeff(k), k); + if EIGEN_PREDICT_FALSE(short_circuit::run(visitor)) return; } } }; @@ -124,38 +314,77 @@ template class visitor_evaluator { public: - typedef internal::evaluator Evaluator; - - enum { - PacketAccess = Evaluator::Flags & PacketAccessBit, - IsRowMajor = XprType::IsRowMajor, - RowsAtCompileTime = XprType::RowsAtCompileTime, - ColsAtCompileTime = XprType::ColsAtCompileTime, - CoeffReadCost = Evaluator::CoeffReadCost - }; + typedef evaluator Evaluator; + typedef typename XprType::Scalar Scalar; + using Packet = typename packet_traits::type; + typedef std::remove_const_t CoeffReturnType; + static constexpr bool PacketAccess = static_cast(Evaluator::Flags & PacketAccessBit); + static constexpr bool LinearAccess = static_cast(Evaluator::Flags & LinearAccessBit); + static constexpr bool IsRowMajor = static_cast(XprType::IsRowMajor); + static constexpr int RowsAtCompileTime = XprType::RowsAtCompileTime; + static constexpr int ColsAtCompileTime = XprType::ColsAtCompileTime; + static constexpr int XprAlignment = Evaluator::Alignment; + static constexpr int CoeffReadCost = Evaluator::CoeffReadCost; EIGEN_DEVICE_FUNC explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) { } - typedef typename XprType::Scalar Scalar; - typedef std::remove_const_t CoeffReturnType; - typedef std::remove_const_t PacketReturnType; - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_xpr.rows(); } EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_xpr.cols(); } EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_xpr.size(); } - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const - { return m_evaluator.coeff(row, col); } - EIGEN_DEVICE_FUNC PacketReturnType packet(Index row, Index col) const - { return m_evaluator.template packet(row, col); } + // outer-inner access + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { return m_evaluator.coeff(row, col); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(Index row, Index col) const { + return m_evaluator.template packet(row, col); + } + // linear access + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_evaluator.coeff(index); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(Index index) const { + return m_evaluator.template packet(index); + } protected: Evaluator m_evaluator; const XprType &m_xpr; }; +template +struct visit_impl { + using Evaluator = visitor_evaluator; + using Scalar = typename DenseBase::Scalar; + + static constexpr bool IsRowMajor = DenseBase::IsRowMajor; + static constexpr int SizeAtCompileTime = DenseBase::SizeAtCompileTime; + static constexpr int RowsAtCompileTime = DenseBase::RowsAtCompileTime; + static constexpr int ColsAtCompileTime = DenseBase::ColsAtCompileTime; + static constexpr int InnerSizeAtCompileTime = IsRowMajor ? ColsAtCompileTime : RowsAtCompileTime; + static constexpr int OuterSizeAtCompileTime = IsRowMajor ? RowsAtCompileTime : ColsAtCompileTime; + + static constexpr bool LinearAccess = Evaluator::LinearAccess && static_cast(functor_traits::LinearAccess); + static constexpr bool Vectorize = Evaluator::PacketAccess && static_cast(functor_traits::PacketAccess); + + static constexpr int PacketSize = packet_traits::size; + static constexpr int VectorOps = Vectorize ? (LinearAccess ? (SizeAtCompileTime / PacketSize) : (OuterSizeAtCompileTime * (InnerSizeAtCompileTime / PacketSize))) : 0; + static constexpr int ScalarOps = SizeAtCompileTime - (VectorOps * PacketSize); + // treat vector op and scalar op as same cost for unroll logic + static constexpr int TotalOps = VectorOps + ScalarOps; + + static constexpr int UnrollCost = int(Evaluator::CoeffReadCost) + int(functor_traits::Cost); + static constexpr bool Unroll = (SizeAtCompileTime != Dynamic) && ((TotalOps * UnrollCost) <= EIGEN_UNROLLING_LIMIT); + static constexpr int UnrollCount = Unroll ? int(SizeAtCompileTime) : Dynamic; + + + using impl = visitor_impl; + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const DenseBase& mat, Visitor& visitor) { + Evaluator evaluator(mat.derived()); + impl::run(evaluator, visitor); + } +}; + } // end namespace internal /** Applies the visitor \a visitor to the whole coefficients of the matrix or vector. @@ -182,17 +411,8 @@ template EIGEN_DEVICE_FUNC void DenseBase::visit(Visitor& visitor) const { - if(size()==0) - return; - - typedef typename internal::visitor_evaluator ThisEvaluator; - ThisEvaluator thisEval(derived()); - - enum { - unroll = SizeAtCompileTime != Dynamic - && SizeAtCompileTime * int(ThisEvaluator::CoeffReadCost) + (SizeAtCompileTime-1) * int(internal::functor_traits::Cost) <= EIGEN_UNROLLING_LIMIT - }; - return internal::visitor_impl::run(thisEval, visitor); + using impl = internal::visit_impl; + impl::run(derived(), visitor); } namespace internal { @@ -219,73 +439,72 @@ struct coeff_visitor }; -template +template struct minmax_compare { typedef typename packet_traits::type Packet; static EIGEN_DEVICE_FUNC inline bool compare(Scalar a, Scalar b) { return a < b; } - static EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& p) { return predux_min(p);} + static EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& p) { return predux_min(p); } }; -template +template struct minmax_compare { typedef typename packet_traits::type Packet; static EIGEN_DEVICE_FUNC inline bool compare(Scalar a, Scalar b) { return a > b; } - static EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& p) { return predux_max(p);} + static EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& p) { return predux_max(p); } }; template -struct minmax_coeff_visitor : coeff_visitor -{ +struct minmax_coeff_visitor : coeff_visitor { using Scalar = typename Derived::Scalar; using Packet = typename packet_traits::type; using Comparator = minmax_compare; + static constexpr Index PacketSize = packet_traits::size; - EIGEN_DEVICE_FUNC inline - void operator() (const Scalar& value, Index i, Index j) - { - if(Comparator::compare(value, this->res)) { + EIGEN_DEVICE_FUNC inline void operator()(const Scalar& value, Index i, Index j) { + if (Comparator::compare(value, this->res)) { this->res = value; this->row = i; this->col = j; } } - - EIGEN_DEVICE_FUNC inline - void packet(const Packet& p, Index i, Index j) { - const Index PacketSize = packet_traits::size; + EIGEN_DEVICE_FUNC inline void packet(const Packet& p, Index i, Index j) { Scalar value = Comparator::predux(p); if (Comparator::compare(value, this->res)) { const Packet range = preverse(plset(Scalar(1))); Packet mask = pcmp_eq(pset1(value), p); Index max_idx = PacketSize - static_cast(predux_max(pand(range, mask))); this->res = value; - this->row = Derived::IsRowMajor ? i : i + max_idx;; + this->row = Derived::IsRowMajor ? i : i + max_idx; this->col = Derived::IsRowMajor ? j + max_idx : j; } } + EIGEN_DEVICE_FUNC inline void initpacket(const Packet& p, Index i, Index j) { + Scalar value = Comparator::predux(p); + const Packet range = preverse(plset(Scalar(1))); + Packet mask = pcmp_eq(pset1(value), p); + Index max_idx = PacketSize - static_cast(predux_max(pand(range, mask))); + this->res = value; + this->row = Derived::IsRowMajor ? i : i + max_idx; + this->col = Derived::IsRowMajor ? j + max_idx : j; + } }; // Suppress NaN. The only case in which we return NaN is if the matrix is all NaN, in which case, // the row=0, col=0 is returned for the location. template -struct minmax_coeff_visitor : coeff_visitor -{ +struct minmax_coeff_visitor : coeff_visitor { typedef typename Derived::Scalar Scalar; using Packet = typename packet_traits::type; using Comparator = minmax_compare; - EIGEN_DEVICE_FUNC inline - void operator() (const Scalar& value, Index i, Index j) - { + EIGEN_DEVICE_FUNC inline void operator()(const Scalar& value, Index i, Index j) { if ((!(numext::isnan)(value) && (numext::isnan)(this->res)) || Comparator::compare(value, this->res)) { this->res = value; this->row = i; this->col = j; } } - - EIGEN_DEVICE_FUNC inline - void packet(const Packet& p, Index i, Index j) { + EIGEN_DEVICE_FUNC inline void packet(const Packet& p, Index i, Index j) { const Index PacketSize = packet_traits::size; Scalar value = Comparator::predux(p); if ((!(numext::isnan)(value) && (numext::isnan)(this->res)) || Comparator::compare(value, this->res)) { @@ -298,21 +517,28 @@ struct minmax_coeff_visitor : coeff_visitorcol = Derived::IsRowMajor ? j + max_idx : j; } } - + EIGEN_DEVICE_FUNC inline void initpacket(const Packet& p, Index i, Index j) { + const Index PacketSize = packet_traits::size; + Scalar value = Comparator::predux(p); + const Packet range = preverse(plset(Scalar(1))); + /* mask will be zero for NaNs, so they will be ignored. */ + Packet mask = pcmp_eq(pset1(value), p); + Index max_idx = PacketSize - static_cast(predux_max(pand(range, mask))); + this->res = value; + this->row = Derived::IsRowMajor ? i : i + max_idx; + this->col = Derived::IsRowMajor ? j + max_idx : j; + } }; // Propagate NaN. If the matrix contains NaN, the location of the first NaN will be returned in // row and col. template -struct minmax_coeff_visitor : coeff_visitor -{ +struct minmax_coeff_visitor : coeff_visitor { typedef typename Derived::Scalar Scalar; using Packet = typename packet_traits::type; using Comparator = minmax_compare; - EIGEN_DEVICE_FUNC inline - void operator() (const Scalar& value, Index i, Index j) - { + EIGEN_DEVICE_FUNC inline void operator()(const Scalar& value, Index i, Index j) { const bool value_is_nan = (numext::isnan)(value); if ((value_is_nan && !(numext::isnan)(this->res)) || Comparator::compare(value, this->res)) { this->res = value; @@ -320,9 +546,7 @@ struct minmax_coeff_visitor : coeff_visitorcol = j; } } - - EIGEN_DEVICE_FUNC inline - void packet(const Packet& p, Index i, Index j) { + EIGEN_DEVICE_FUNC inline void packet(const Packet& p, Index i, Index j) { const Index PacketSize = packet_traits::size; Scalar value = Comparator::predux(p); const bool value_is_nan = (numext::isnan)(value); @@ -332,10 +556,22 @@ struct minmax_coeff_visitor : coeff_visitor(value), p); Index max_idx = PacketSize - static_cast(predux_max(pand(range, mask))); this->res = value; - this->row = Derived::IsRowMajor ? i : i + max_idx;; + this->row = Derived::IsRowMajor ? i : i + max_idx; this->col = Derived::IsRowMajor ? j + max_idx : j; } } + EIGEN_DEVICE_FUNC inline void initpacket(const Packet& p, Index i, Index j) { + const Index PacketSize = packet_traits::size; + Scalar value = Comparator::predux(p); + const bool value_is_nan = (numext::isnan)(value); + const Packet range = preverse(plset(Scalar(1))); + // If the value is NaN, pick the first position of a NaN, otherwise pick the first extremal value. + Packet mask = value_is_nan ? pnot(pcmp_eq(p, p)) : pcmp_eq(pset1(value), p); + Index max_idx = PacketSize - static_cast(predux_max(pand(range, mask))); + this->res = value; + this->row = Derived::IsRowMajor ? i : i + max_idx; + this->col = Derived::IsRowMajor ? j + max_idx : j; + } }; template @@ -343,10 +579,90 @@ struct functor_traits > { using Scalar = typename Derived::Scalar; enum { Cost = NumTraits::AddCost, + LinearAccess = false, PacketAccess = packet_traits::HasCmp }; }; +template +struct all_visitor { + using result_type = bool; + using Packet = typename packet_traits::type; + EIGEN_DEVICE_FUNC inline void init(const Scalar& value, Index, Index) { res = (value != Scalar(0)); } + EIGEN_DEVICE_FUNC inline void init(const Scalar& value, Index) { res = (value != Scalar(0)); } + EIGEN_DEVICE_FUNC inline bool all_predux(const Packet& p) const { return !predux_any(pcmp_eq(p, pzero(p))); } + EIGEN_DEVICE_FUNC inline void initpacket(const Packet& p, Index, Index) { res = all_predux(p); } + EIGEN_DEVICE_FUNC inline void initpacket(const Packet& p, Index) { res = all_predux(p); } + EIGEN_DEVICE_FUNC inline void operator()(const Scalar& value, Index, Index) { res = res && (value != Scalar(0)); } + EIGEN_DEVICE_FUNC inline void operator()(const Scalar& value, Index) { res = res && (value != Scalar(0)); } + EIGEN_DEVICE_FUNC inline void packet(const Packet& p, Index, Index) { res = res && all_predux(p); } + EIGEN_DEVICE_FUNC inline void packet(const Packet& p, Index) { res = res && all_predux(p); } + EIGEN_DEVICE_FUNC inline bool done() const { return !res; } + bool res = true; +}; +template +struct functor_traits> { + enum { Cost = NumTraits::ReadCost, LinearAccess = true, PacketAccess = packet_traits::HasCmp }; +}; + +template +struct any_visitor { + using result_type = bool; + using Packet = typename packet_traits::type; + EIGEN_DEVICE_FUNC inline void init(const Scalar& value, Index, Index) { res = (value != Scalar(0)); } + EIGEN_DEVICE_FUNC inline void init(const Scalar& value, Index) { res = (value != Scalar(0)); } + EIGEN_DEVICE_FUNC inline bool any_predux(const Packet& p) const { + return predux_any(pandnot(ptrue(p), pcmp_eq(p, pzero(p)))); + } + EIGEN_DEVICE_FUNC inline void initpacket(const Packet& p, Index, Index) { res = any_predux(p); } + EIGEN_DEVICE_FUNC inline void initpacket(const Packet& p, Index) { res = any_predux(p); } + EIGEN_DEVICE_FUNC inline void operator()(const Scalar& value, Index, Index) { res = res || (value != Scalar(0)); } + EIGEN_DEVICE_FUNC inline void operator()(const Scalar& value, Index) { res = res || (value != Scalar(0)); } + EIGEN_DEVICE_FUNC inline void packet(const Packet& p, Index, Index) { res = res || any_predux(p); } + EIGEN_DEVICE_FUNC inline void packet(const Packet& p, Index) { res = res || any_predux(p); } + EIGEN_DEVICE_FUNC inline bool done() const { return res; } + bool res = false; +}; +template +struct functor_traits> { + enum { Cost = NumTraits::ReadCost, LinearAccess = true, PacketAccess = packet_traits::HasCmp }; +}; + +template +struct count_visitor { + using result_type = Index; + using Packet = typename packet_traits::type; + EIGEN_DEVICE_FUNC inline void init(const Scalar& value, Index, Index) { res = value != Scalar(0) ? 1 : 0; } + EIGEN_DEVICE_FUNC inline void init(const Scalar& value, Index) { res = value != Scalar(0) ? 1 : 0; } + EIGEN_DEVICE_FUNC inline Index count_redux(const Packet& p) const { + const Packet cst_one = pset1(Scalar(1)); + Packet true_vals = pandnot(cst_one, pcmp_eq(p, pzero(p))); + Scalar num_true = predux(true_vals); + return static_cast(num_true); + } + EIGEN_DEVICE_FUNC inline void initpacket(const Packet& p, Index, Index) { res = count_redux(p); } + EIGEN_DEVICE_FUNC inline void initpacket(const Packet& p, Index) { res = count_redux(p); } + EIGEN_DEVICE_FUNC inline void operator()(const Scalar& value, Index, Index) { + if (value != Scalar(0)) res++; + } + EIGEN_DEVICE_FUNC inline void operator()(const Scalar& value, Index) { + if (value != Scalar(0)) res++; + } + EIGEN_DEVICE_FUNC inline void packet(const Packet& p, Index, Index) { res += count_redux(p); } + EIGEN_DEVICE_FUNC inline void packet(const Packet& p, Index) { res += count_redux(p); } + Index res = 0; +}; + +template +struct functor_traits> { + enum { + Cost = NumTraits::AddCost, + LinearAccess = true, + // predux is problematic for bool + PacketAccess = packet_traits::HasCmp && packet_traits::HasAdd && !is_same::value + }; +}; + } // end namespace internal /** \fn DenseBase::minCoeff(IndexType* rowId, IndexType* colId) const @@ -391,10 +707,10 @@ EIGEN_DEVICE_FUNC typename internal::traits::Scalar DenseBase::minCoeff(IndexType* index) const { - eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); - + eigen_assert(this->rows() > 0 && this->cols() > 0 && "you are using an empty matrix"); EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - internal::minmax_coeff_visitor minVisitor; + + internal::minmax_coeff_visitor minVisitor; this->visit(minVisitor); *index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row); return minVisitor.res; @@ -445,12 +761,71 @@ DenseBase::maxCoeff(IndexType* index) const eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - internal::minmax_coeff_visitor maxVisitor; + internal::minmax_coeff_visitor maxVisitor; this->visit(maxVisitor); *index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row; return maxVisitor.res; } +/** \returns true if all coefficients are true + * + * Example: \include MatrixBase_all.cpp + * Output: \verbinclude MatrixBase_all.out + * + * \sa any(), Cwise::operator<() + */ +template +EIGEN_DEVICE_FUNC inline bool DenseBase::all() const { + using Visitor = internal::all_visitor; + using impl = internal::visit_impl; + Visitor visitor; + impl::run(derived(), visitor); + return visitor.res; +} + +/** \returns true if at least one coefficient is true + * + * \sa all() + */ +template +EIGEN_DEVICE_FUNC inline bool DenseBase::any() const { + using Visitor = internal::any_visitor; + using impl = internal::visit_impl; + Visitor visitor; + impl::run(derived(), visitor); + return visitor.res; +} + +/** \returns the number of coefficients which evaluate to true + * + * \sa all(), any() + */ +template +EIGEN_DEVICE_FUNC +Index DenseBase::count() const +{ + using Visitor = internal::count_visitor; + using impl = internal::visit_impl; + Visitor visitor; + impl::run(derived(), visitor); + return visitor.res; + +} + +template +EIGEN_DEVICE_FUNC inline bool DenseBase::hasNaN() const { + return derived().cwiseTypedNotEqual(derived()).any(); +} + +/** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values. + * + * \sa hasNaN() + */ +template +EIGEN_DEVICE_FUNC inline bool DenseBase::allFinite() const { + return derived().cwiseAbs().cwiseTypedLesser(NumTraits::infinity()).all(); +} + } // end namespace Eigen #endif // EIGEN_VISITOR_H diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp index 9b8891dbe..0d0cb5f07 100644 --- a/test/array_cwise.cpp +++ b/test/array_cwise.cpp @@ -94,8 +94,8 @@ void binary_op_test(std::string name, Fn fun, RefFn ref) { } #define BINARY_FUNCTOR_TEST_ARGS(fun) #fun, \ - [](const auto& x, const auto& y) { return (Eigen::fun)(x, y); }, \ - [](const auto& x, const auto& y) { return (std::fun)(x, y); } + [](const auto& x_, const auto& y_) { return (Eigen::fun)(x_, y_); }, \ + [](const auto& x_, const auto& y_) { return (std::fun)(x_, y_); } template diff --git a/test/visitor.cpp b/test/visitor.cpp index bc34917e1..9586539a2 100644 --- a/test/visitor.cpp +++ b/test/visitor.cpp @@ -173,23 +173,36 @@ template void vectorVisitor(const VectorType& w) } } -template +template struct TrackedVisitor { - void init(T v, Index i, Index j) { return this->operator()(v,i,j); } - void operator()(T v, Index i, Index j) { + using Scalar = typename DenseBase::Scalar; + static constexpr int PacketSize = Eigen::internal::packet_traits::size; + static constexpr bool RowMajor = Derived::IsRowMajor; + + void init(Scalar v, Index i, Index j) { return this->operator()(v, i, j); } + template + void initpacket(Packet p, Index i, Index j) { + return this->packet(p, i, j); + } + void operator()(Scalar v, Index i, Index j) { EIGEN_UNUSED_VARIABLE(v) - visited.push_back({i, j}); - vectorized = false; + visited.emplace_back(i, j); + scalarOps++; } - - template + + template void packet(Packet p, Index i, Index j) { - EIGEN_UNUSED_VARIABLE(p) - visited.push_back({i, j}); - vectorized = true; + EIGEN_UNUSED_VARIABLE(p) + for (int k = 0; k < PacketSize; k++) + if (RowMajor) + visited.emplace_back(i, j + k); + else + visited.emplace_back(i + k, j); + vectorOps++; } - std::vector> visited; - bool vectorized; + std::vector> visited; + Index scalarOps = 0; + Index vectorOps = 0; }; namespace Eigen { @@ -197,129 +210,64 @@ namespace internal { template struct functor_traits > { - enum { PacketAccess = Vectorizable, Cost = 1 }; + enum { PacketAccess = Vectorizable, LinearAccess = false, Cost = 1 }; }; } // namespace internal } // namespace Eigen +template +void checkOptimalTraversal_impl(const DenseBase& mat) { + using Scalar = typename DenseBase::Scalar; + static constexpr int PacketSize = Eigen::internal::packet_traits::size; + static constexpr bool RowMajor = Derived::IsRowMajor; + Derived X(mat.rows(), mat.cols()); + X.setRandom(); + TrackedVisitor visitor; + visitor.visited.reserve(X.size()); + X.visit(visitor); + Index count = 0; + for (Index j = 0; j < X.outerSize(); ++j) { + for (Index i = 0; i < X.innerSize(); ++i) { + Index r = RowMajor ? j : i; + Index c = RowMajor ? i : j; + VERIFY_IS_EQUAL(visitor.visited[count].first, r); + VERIFY_IS_EQUAL(visitor.visited[count].second, c); + ++count; + } + } + Index vectorOps = Vectorized ? ((X.innerSize() / PacketSize) * X.outerSize()) : 0; + Index scalarOps = X.size() - (vectorOps * PacketSize); + VERIFY_IS_EQUAL(vectorOps, visitor.vectorOps); + VERIFY_IS_EQUAL(scalarOps, visitor.scalarOps); +} + void checkOptimalTraversal() { - - // Unrolled - ColMajor. - { - using MatrixType = Matrix; - MatrixType X = MatrixType::Random(4, 4); - TrackedVisitor visitor; - X.visit(visitor); - Index count = 0; - for (Index j=0; j; - MatrixType X = MatrixType::Random(4, 4); - TrackedVisitor visitor; - X.visit(visitor); - Index count = 0; - for (Index i=0; i; - MatrixType X = MatrixType::Random(4, 4); - TrackedVisitor visitor; - X.visit(visitor); - Index count = 0; - for (Index j=0; j; - MatrixType X = MatrixType::Random(4, 4); - TrackedVisitor visitor; - X.visit(visitor); - Index count = 0; - for (Index i=0; i; - // Ensure rows/cols is larger than packet size. - constexpr int PacketSize = Eigen::internal::packet_traits::size; - MatrixType X = MatrixType::Random(4 * PacketSize, 4 * PacketSize); - TrackedVisitor visitor; - X.visit(visitor); - Index previ = -1; - Index prevj = 0; - for (const auto& p : visitor.visited) { - Index i = p.first; - Index j = p.second; - VERIFY( - (j == prevj && i == previ + 1) // Advance single element - || (j == prevj && i == previ + PacketSize) // Advance packet - || (j == prevj + 1 && i == 0) // Advance column - ); - previ = i; - prevj = j; - } - if (Eigen::internal::packet_traits::Vectorizable) { - VERIFY(visitor.vectorized); - } - } - - // Vectorized - RowMajor. - { - using MatrixType = Matrix; - // Ensure rows/cols is larger than packet size. - constexpr int PacketSize = Eigen::internal::packet_traits::size; - MatrixType X = MatrixType::Random(4 * PacketSize, 4 * PacketSize); - TrackedVisitor visitor; - X.visit(visitor); - Index previ = 0; - Index prevj = -1; - for (const auto& p : visitor.visited) { - Index i = p.first; - Index j = p.second; - VERIFY( - (i == previ && j == prevj + 1) // Advance single element - || (i == previ && j == prevj + PacketSize) // Advance packet - || (i == previ + 1 && j == 0) // Advance row - ); - previ = i; - prevj = j; - } - if (Eigen::internal::packet_traits::Vectorizable) { - VERIFY(visitor.vectorized); - } - } + + using Scalar = float; + constexpr int PacketSize = Eigen::internal::packet_traits::size; + // use sizes that mix vector and scalar ops + constexpr int Rows = 3 * PacketSize + 1; + constexpr int Cols = 4 * PacketSize + 1; + int rows = internal::random(PacketSize + 1, EIGEN_TEST_MAX_SIZE); + int cols = internal::random(PacketSize + 1, EIGEN_TEST_MAX_SIZE); + + using UnrollColMajor = Matrix; + using UnrollRowMajor = Matrix; + using DynamicColMajor = Matrix; + using DynamicRowMajor = Matrix; + + // Scalar-only visitors + checkOptimalTraversal_impl(UnrollColMajor(Rows,Cols)); + checkOptimalTraversal_impl(UnrollRowMajor(Rows, Cols)); + checkOptimalTraversal_impl(DynamicColMajor(rows, cols)); + checkOptimalTraversal_impl(DynamicRowMajor(rows, cols)); + + // Vectorized visitors + checkOptimalTraversal_impl(UnrollColMajor(Rows, Cols)); + checkOptimalTraversal_impl(UnrollRowMajor(Rows, Cols)); + checkOptimalTraversal_impl(DynamicColMajor(rows, cols)); + checkOptimalTraversal_impl(DynamicRowMajor(rows, cols)); } EIGEN_DECLARE_TEST(visitor)