diff --git a/Eigen/Array b/Eigen/Array index 9595e7db5..51c3abe31 100644 --- a/Eigen/Array +++ b/Eigen/Array @@ -5,7 +5,6 @@ namespace Eigen { -#include "src/Array/ArrayBase.h" #include "src/Array/CwiseOperators.h" #include "src/Array/Functors.h" #include "src/Array/AllAndAny.h" diff --git a/Eigen/src/Core/DiagonalProduct.h b/Eigen/src/Core/DiagonalProduct.h index 11bc36e16..f902d7bbe 100644 --- a/Eigen/src/Core/DiagonalProduct.h +++ b/Eigen/src/Core/DiagonalProduct.h @@ -26,48 +26,57 @@ #ifndef EIGEN_DIAGONALPRODUCT_H #define EIGEN_DIAGONALPRODUCT_H -template -struct ei_traits > +template +struct ei_traits > { - typedef typename Lhs::Scalar Scalar; - typedef typename ei_nested::type LhsNested; - typedef typename ei_nested::type RhsNested; - typedef typename ei_unref::type _LhsNested; - typedef typename ei_unref::type _RhsNested; + // clean the nested types: + typedef typename ei_unconst::type>::type _LhsNested; + typedef typename ei_unconst::type>::type _RhsNested; + typedef typename _LhsNested::Scalar Scalar; + enum { LhsFlags = _LhsNested::Flags, RhsFlags = _RhsNested::Flags, - RowsAtCompileTime = Lhs::RowsAtCompileTime, - ColsAtCompileTime = Rhs::ColsAtCompileTime, - MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime, - MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime, - _RhsPacketAccess = (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit) + RowsAtCompileTime = _LhsNested::RowsAtCompileTime, + ColsAtCompileTime = _RhsNested::ColsAtCompileTime, + MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, + MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, + + LhsIsDiagonal = (_LhsNested::Flags&Diagonal)==Diagonal, + RhsIsDiagonal = (_RhsNested::Flags&Diagonal)==Diagonal, + + CanVectorizeRhs = (!RhsIsDiagonal) && (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime % ei_packet_traits::size == 0), - _LhsPacketAccess = (!(LhsFlags & RowMajorBit)) && (LhsFlags & PacketAccessBit) + + CanVectorizeLhs = (!LhsIsDiagonal) && (!(LhsFlags & RowMajorBit)) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime % ei_packet_traits::size == 0), - _LostBits = ~(((RhsFlags & RowMajorBit) && (!_LhsPacketAccess) ? 0 : RowMajorBit) - | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)), - Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & _LostBits) - | (_LhsPacketAccess || _RhsPacketAccess ? PacketAccessBit : 0), + + RemovedBits = ~(((RhsFlags & RowMajorBit) && (!CanVectorizeLhs) ? 0 : RowMajorBit) + | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)) + | LinearAccessBit, + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) + | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0), + CoeffReadCost = NumTraits::MulCost + _LhsNested::CoeffReadCost + _RhsNested::CoeffReadCost }; }; -template class Product : ei_no_assignment_operator, - public MatrixBase > +template class Product : ei_no_assignment_operator, + public MatrixBase > { - public: - - EIGEN_GENERIC_PUBLIC_INTERFACE(Product) - typedef typename ei_traits::LhsNested LhsNested; - typedef typename ei_traits::RhsNested RhsNested; typedef typename ei_traits::_LhsNested _LhsNested; typedef typename ei_traits::_RhsNested _RhsNested; enum { - PacketSize = ei_packet_traits::size + RhsIsDiagonal = (_RhsNested::Flags&Diagonal)==Diagonal }; + public: + + EIGEN_GENERIC_PUBLIC_INTERFACE(Product) + + template inline Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { @@ -81,14 +90,14 @@ template class Product : const Scalar _coeff(int row, int col) const { - int unique = ((Rhs::Flags&Diagonal)==Diagonal) ? col : row; + const int unique = RhsIsDiagonal ? col : row; return m_lhs.coeff(row, unique) * m_rhs.coeff(unique, col); } template const PacketScalar _packet(int row, int col) const { - if ((Rhs::Flags&Diagonal)==Diagonal) + if (RhsIsDiagonal) { ei_assert((_LhsNested::Flags&RowMajorBit)==0); return ei_pmul(m_lhs.template packet(row, col), ei_pset1(m_rhs.coeff(col, col))); diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 754a1ec98..97c8a39d1 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -49,7 +49,7 @@ * * \nosubgrouping */ -template class MatrixBase : public ArrayBase +template class MatrixBase { struct CommaInitializer; @@ -168,16 +168,6 @@ template class MatrixBase : public ArrayBase }; /** Represents a product scalar-matrix */ typedef CwiseUnaryOp, Derived> ScalarMultipleReturnType; - /** */ - template - struct ProductReturnType - { - typedef typename ei_meta_if< - (Derived::Flags & OtherDerived::Flags & ArrayBit), - CwiseBinaryOp::Scalar>, Derived, OtherDerived>, - Product - >::ret Type; - }; /** the return type of MatrixBase::conjugate() */ typedef typename ei_meta_if::IsComplex, CwiseUnaryOp, Derived>, @@ -274,7 +264,7 @@ template class MatrixBase : public ArrayBase template - const typename ProductReturnType::Type + const typename ProductReturnType::Type operator*(const MatrixBase &other) const; template diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index def183b38..f03ea4e8e 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -26,123 +26,69 @@ #ifndef EIGEN_PRODUCT_H #define EIGEN_PRODUCT_H -template -struct ei_product_impl -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, - typename Lhs::Scalar &res) - { - ei_product_impl::run(row, col, lhs, rhs, res); - res += lhs.coeff(row, Index) * rhs.coeff(Index, col); - } +/*************************** +*** Forward declarations *** +***************************/ + +enum { + ColMajorProduct, + RowMajorProduct }; -template -struct ei_product_impl<0, Size, Lhs, Rhs> -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, - typename Lhs::Scalar &res) - { - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - } -}; +template +struct ei_product_coeff_impl; -template -struct ei_product_impl -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res) - { - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - for(int i = 1; i < lhs.cols(); i++) - res += lhs.coeff(row, i) * rhs.coeff(i, col); - } -}; +template +struct ei_product_packet_impl; -// prevent buggy user code from causing an infinite recursion -template -struct ei_product_impl -{ - inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} -}; +template class ei_product_eval_to_column_major; -//---------- - -template -struct ei_packet_product_impl; - -template -struct ei_packet_product_impl -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - ei_packet_product_impl::run(row, col, lhs, rhs, res); - res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet(Index, col), res); - } -}; - -template -struct ei_packet_product_impl -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - ei_packet_product_impl::run(row, col, lhs, rhs, res); - res = ei_pmadd(lhs.template packet(row, Index), ei_pset1(rhs.coeff(Index, col)), res); - } -}; - -template -struct ei_packet_product_impl -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); - } -}; - -template -struct ei_packet_product_impl -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) - { - res = ei_pmul(lhs.template packet(row, 0), ei_pset1(rhs.coeff(0, col))); - } -}; - -template -struct ei_packet_product_impl -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) - { - res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); - for(int i = 1; i < lhs.cols(); i++) - res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); - } -}; - -template -struct ei_packet_product_impl -{ - inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) - { - res = ei_pmul(lhs.template packet(row, 0), ei_pset1(rhs.coeff(0, col))); - for(int i = 1; i < lhs.cols(); i++) - res = ei_pmadd(lhs.template packet(row, i), ei_pset1(rhs.coeff(i, col)), res); - } -}; - -/** \class Product +/** \class ProductReturnType * - * \brief Expression of the product of two matrices + * \brief Helper class to get the correct and optimized returned type of operator* * * \param Lhs the type of the left-hand side * \param Rhs the type of the right-hand side - * \param EvalMode internal use only + * \param ProductMode the type of the product (determined automatically by ei_product_mode) * - * This class represents an expression of the product of two matrices. - * It is the return type of the operator* between matrices, and most of the time - * this is the only way it is used. + * This class defines the typename Type representing the optimized product expression + * between two matrix expressions. In practice, using ProductReturnType::Type + * is the recommended way to define the result type of a function returning an expression + * which involve a matrix product. The class Product or DiagonalProduct should never be + * used directly. + * + * \sa class Product, class DiagonalProduct, MatrixBase::operator*(const MatrixBase&) */ -template struct ei_product_eval_mode +template +struct ProductReturnType +{ + typedef typename ei_nested::type LhsNested; + typedef typename ei_nested::type RhsNested; + + typedef Product::type, + typename ei_unconst::type, ProductMode> Type; +}; + +// cache friendly specialization +template +struct ProductReturnType +{ + typedef typename ei_nested::type LhsNested; + + typedef typename ei_nested::type + >::type RhsNested; + + typedef Product::type, + typename ei_unconst::type, CacheFriendlyProduct> Type; +}; + +/* Helper class to determine the type of the product, can be either: + * - NormalProduct + * - CacheFriendlyProduct + * - NormalProduct + */ +template struct ei_product_mode { enum{ value = ((Rhs::Flags&Diagonal)==Diagonal) || ((Lhs::Flags&Diagonal)==Diagonal) ? DiagonalProduct @@ -152,100 +98,103 @@ template struct ei_product_eval_mode ? CacheFriendlyProduct : NormalProduct }; }; -template class ei_product_eval_to_column_major +/** \class Product + * + * \brief Expression of the product of two matrices + * + * \param LhsNested the type used to store the left-hand side + * \param RhsNested the type used to store the right-hand side + * \param ProductMode the type of the product + * + * This class represents an expression of the product of two matrices. + * It is the return type of the operator* between matrices. Its template + * arguments are determined automatically by ProductReturnType. Therefore, + * Product should be used direclty. To determine the result type of a function + * which involve a matrix product, use ProductReturnType::Type. + * + * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase&) + */ +template +struct ei_traits > { - typedef typename ei_traits::Scalar _Scalar; - enum { - _Rows = ei_traits::RowsAtCompileTime, - _Cols = ei_traits::ColsAtCompileTime, - _MaxRows = ei_traits::MaxRowsAtCompileTime, - _MaxCols = ei_traits::MaxColsAtCompileTime, - _Flags = ei_traits::Flags - }; - - public: - typedef Matrix<_Scalar, - _Rows, _Cols, _MaxRows, _MaxCols, - ei_corrected_matrix_flags< - _Scalar, - _Rows, _Cols, _MaxRows, _MaxCols, - _Flags - >::ret & ~RowMajorBit - > type; -}; - -// as ei_nested, but evaluate to a column-major matrix if an evaluation is required -template struct ei_product_nested_rhs -{ - typedef typename ei_meta_if< - ei_must_nest_by_value::ret, - T, - typename ei_meta_if< - ((ei_traits::Flags & EvalBeforeNestingBit) - || (n+1) * (NumTraits::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost), - typename ei_product_eval_to_column_major::type, - const T& - >::ret - >::ret type; -}; - -template -struct ei_traits > -{ - typedef typename Lhs::Scalar Scalar; - typedef typename ei_nested::type LhsNested; - typedef typename ei_meta_if::type, - typename ei_nested::type>::ret RhsNested; + // clean the nested types: typedef typename ei_unconst::type>::type _LhsNested; typedef typename ei_unconst::type>::type _RhsNested; + typedef typename _LhsNested::Scalar Scalar; + enum { LhsCoeffReadCost = _LhsNested::CoeffReadCost, RhsCoeffReadCost = _RhsNested::CoeffReadCost, LhsFlags = _LhsNested::Flags, RhsFlags = _RhsNested::Flags, - RowsAtCompileTime = Lhs::RowsAtCompileTime, - ColsAtCompileTime = Rhs::ColsAtCompileTime, - MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime, - MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime, - // the vectorization flags are only used by the normal product, - // the other one is always vectorized ! - _RhsPacketAccess = (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime % ei_packet_traits::size == 0), - _LhsPacketAccess = (!(LhsFlags & RowMajorBit)) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime % ei_packet_traits::size == 0), - _PacketAccess = (_LhsPacketAccess || _RhsPacketAccess) ? 1 : 0, - _RowMajor = (RhsFlags & RowMajorBit) - && (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsPacketAccess)), - _LostBits = ~((_RowMajor ? 0 : RowMajorBit) + + RowsAtCompileTime = _LhsNested::RowsAtCompileTime, + ColsAtCompileTime = _RhsNested::ColsAtCompileTime, + InnerSize = EIGEN_ENUM_MIN(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), + + MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, + MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) + && (ColsAtCompileTime % ei_packet_traits::size == 0), + + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) + && (RowsAtCompileTime % ei_packet_traits::size == 0), + + CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & PacketAccessBit) && (RhsFlags & PacketAccessBit) + && (InnerSize!=Dynamic) && (InnerSize % ei_packet_traits::size == 0), + + EvalToRowMajor = (RhsFlags & RowMajorBit) + && (ProductMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!CanVectorizeLhs)), + + RemovedBits = ~((EvalToRowMajor ? 0 : RowMajorBit) | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit) | LinearAccessBit), - Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & _LostBits) + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) | EvalBeforeAssigningBit | EvalBeforeNestingBit - | (_PacketAccess ? PacketAccessBit : 0), - CoeffReadCost - = Lhs::ColsAtCompileTime == Dynamic - ? Dynamic - : Lhs::ColsAtCompileTime - * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) - + (Lhs::ColsAtCompileTime - 1) * NumTraits::AddCost + | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0), + + CoeffReadCost = InnerSize == Dynamic ? Dynamic + : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + + (InnerSize - 1) * NumTraits::AddCost }; }; -template class Product : ei_no_assignment_operator, - public MatrixBase > +template class Product : ei_no_assignment_operator, + public MatrixBase > { public: EIGEN_GENERIC_PUBLIC_INTERFACE(Product) - typedef typename ei_traits::LhsNested LhsNested; - typedef typename ei_traits::RhsNested RhsNested; + + private: + typedef typename ei_traits::_LhsNested _LhsNested; typedef typename ei_traits::_RhsNested _RhsNested; enum { - PacketSize = ei_packet_traits::size + PacketSize = ei_packet_traits::size, + InnerSize = ei_traits::InnerSize, + Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + CanVectorizeInner = ei_traits::CanVectorizeInner && Unroll }; + typedef ei_product_coeff_impl ScalarCoeffImpl; + + typedef ei_product_packet_impl PacketCoeffImpl; + + public: + + template inline Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { @@ -268,23 +217,15 @@ template class Product : ei_no_assignm const Scalar _coeff(int row, int col) const { Scalar res; - const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT; - ei_product_impl - ::run(row, col, m_lhs, m_rhs, res); + ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); return res; } template const PacketScalar _packet(int row, int col) const { - const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT; PacketScalar res; - ei_packet_product_impl - ::run(row, col, m_lhs, m_rhs, res); + PacketCoeffImpl::run(row, col, m_lhs, m_rhs, res); return res; } @@ -302,11 +243,11 @@ template class Product : ei_no_assignm */ template template -inline const typename MatrixBase::template ProductReturnType::Type +inline const typename ProductReturnType::Type MatrixBase::operator*(const MatrixBase &other) const { assert( (Derived::Flags&ArrayBit) == (OtherDerived::Flags&ArrayBit) ); - return typename ProductReturnType::Type(derived(), other.derived()); + return typename ProductReturnType::Type(derived(), other.derived()); } /** replaces \c *this by \c *this * \a other. @@ -321,6 +262,157 @@ MatrixBase::operator*=(const MatrixBase &other) return *this = *this * other; } +/*************************************************************************** +* Normal product .coeff() implementation (with meta-unrolling) +***************************************************************************/ + +/************************************** +*** Scalar path - no vectorization *** +**************************************/ + +template +struct ei_product_coeff_impl +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + ei_product_coeff_impl::run(row, col, lhs, rhs, res); + res += lhs.coeff(row, Index) * rhs.coeff(Index, col); + } +}; + +template +struct ei_product_coeff_impl +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + } +}; + +template +struct ei_product_coeff_impl +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res) + { + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + for(int i = 1; i < lhs.cols(); i++) + res += lhs.coeff(row, i) * rhs.coeff(i, col); + } +}; + +// prevent buggy user code from causing an infinite recursion +template +struct ei_product_coeff_impl +{ + inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {} +}; + +/******************************************* +*** Scalar path with inner vectorization *** +*******************************************/ + +template +struct ei_product_coeff_vectorized_impl +{ + enum { PacketSize = ei_packet_traits::size }; + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) + { + ei_product_coeff_vectorized_impl::run(row, col, lhs, rhs, pres); + pres = ei_padd(pres, ei_pmul( lhs.template packet(row, Index) , rhs.template packet(Index, col) )); + } +}; + +template +struct ei_product_coeff_vectorized_impl<0, Lhs, Rhs, PacketScalar> +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) + { + pres = ei_pmul(lhs.template packet(row, 0) , rhs.template packet(0, col)); + } +}; + +template +struct ei_product_coeff_impl +{ + typedef typename Lhs::PacketScalar PacketScalar; + enum { PacketSize = ei_packet_traits::size }; + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + PacketScalar pres; + ei_product_coeff_vectorized_impl::run(row, col, lhs, rhs, pres); + ei_product_coeff_impl::run(row, col, lhs, rhs, res); + res = ei_predux(pres); + } +}; + +/******************* +*** Packet path *** +*******************/ + +template +struct ei_product_packet_impl +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + ei_product_packet_impl::run(row, col, lhs, rhs, res); + res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet(Index, col), res); + } +}; + +template +struct ei_product_packet_impl +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + ei_product_packet_impl::run(row, col, lhs, rhs, res); + res = ei_pmadd(lhs.template packet(row, Index), ei_pset1(rhs.coeff(Index, col)), res); + } +}; + +template +struct ei_product_packet_impl +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); + } +}; + +template +struct ei_product_packet_impl +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + res = ei_pmul(lhs.template packet(row, 0), ei_pset1(rhs.coeff(0, col))); + } +}; + +template +struct ei_product_packet_impl +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) + { + res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); + for(int i = 1; i < lhs.cols(); i++) + res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); + } +}; + +template +struct ei_product_packet_impl +{ + inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) + { + res = ei_pmul(lhs.template packet(row, 0), ei_pset1(rhs.coeff(0, col))); + for(int i = 1; i < lhs.cols(); i++) + res = ei_pmadd(lhs.template packet(row, i), ei_pset1(rhs.coeff(i, col)), res); + } +}; + +/*************************************************************************** +* Cache friendly product callers and specific nested evaluation strategies +***************************************************************************/ + /** \internal */ template template @@ -339,6 +431,28 @@ inline Derived& MatrixBase::lazyAssign(const Product class ei_product_eval_to_column_major +{ + typedef typename ei_traits::Scalar _Scalar; + enum { + _Rows = ei_traits::RowsAtCompileTime, + _Cols = ei_traits::ColsAtCompileTime, + _MaxRows = ei_traits::MaxRowsAtCompileTime, + _MaxCols = ei_traits::MaxColsAtCompileTime, + _Flags = ei_traits::Flags + }; + + public: + typedef Matrix<_Scalar, + _Rows, _Cols, _MaxRows, _MaxCols, + ei_corrected_matrix_flags< + _Scalar, + _Rows, _Cols, _MaxRows, _MaxCols, + _Flags + >::ret & ~RowMajorBit + > type; +}; + template struct ei_product_copy_rhs { typedef typename ei_meta_if< diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 9cac3e984..0b65a57aa 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -140,7 +140,7 @@ enum { Aligned=0, UnAligned=1 }; enum { ConditionalJumpCost = 5 }; enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight }; enum DirectionType { Vertical, Horizontal }; -enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, DiagonalProduct, LazyProduct}; +enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, DiagonalProduct }; #endif // EIGEN_CONSTANTS_H diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 6a3810df6..268b24db0 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -26,7 +26,6 @@ #define EIGEN_FORWARDDECLARATIONS_H template struct ei_traits; -template struct ei_product_eval_mode; template struct NumTraits; template class ei_corrected_matrix_flags; @@ -49,7 +48,7 @@ template class Conjugate; template class CwiseNullaryOp; template class CwiseUnaryOp; template class CwiseBinaryOp; -template::value> class Product; +template class Product; template class DiagonalMatrix; template class DiagonalCoeffs; template class Map; @@ -63,6 +62,8 @@ template class Rotation2D; template class AngleAxis; template class Transform; +template struct ei_product_mode; +template::value> struct ProductReturnType; template struct ei_scalar_sum_op; template struct ei_scalar_difference_op; diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 33a09f87c..509b72cc0 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -160,10 +160,7 @@ class ei_corrected_matrix_flags packet_access_bit = ei_packet_traits::size > 1 && (is_big || inner_size%ei_packet_traits::size==0) - ? PacketAccessBit : 0, - - _flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit | PacketAccessBit | RowMajorBit)) - | LinearAccessBit | DirectAccessBit + ? PacketAccessBit : 0 }; public: @@ -208,7 +205,7 @@ template struct ei_must_nest_by_value { enum { ret = false }; }; template struct ei_must_nest_by_value > { enum { ret = true }; }; -template struct ei_nested +template::type> struct ei_nested { typedef typename ei_meta_if< ei_must_nest_by_value::ret, @@ -216,7 +213,7 @@ template struct ei_nested typename ei_meta_if< (int(ei_traits::Flags) & EvalBeforeNestingBit) || ((n+1) * int(NumTraits::Scalar>::ReadCost) <= (n-1) * int(T::CoeffReadCost)), - typename ei_eval::type, + EvalType, const T& >::ret >::ret type; diff --git a/Eigen/src/Geometry/Rotation.h b/Eigen/src/Geometry/Rotation.h index bff63c40e..7e07b48f5 100644 --- a/Eigen/src/Geometry/Rotation.h +++ b/Eigen/src/Geometry/Rotation.h @@ -107,10 +107,10 @@ struct ToRotationMatrix > * * \param _Scalar the scalar type, i.e., the type of the coefficients * - * This class is equivalent to a single scalar representating the rotation angle + * This class is equivalent to a single scalar representing the rotation angle * in radian with some additional features such as the conversion from/to * rotation matrix. Moreover this class aims to provide a similar interface - * to Quaternion in order to facilitate the writting of generic algorithm + * to Quaternion in order to facilitate the writing of generic algorithm * dealing with rotations. * * \sa class Quaternion, class Transform diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index cdc24e772..b5c5b3a0d 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -103,17 +103,17 @@ public: inline VectorRef translation() { return m_matrix.template block(0,Dim); } template - struct ProductReturnType + struct TransformProductReturnType { typedef typename ei_transform_product_impl::ResultType Type; }; template - const typename ProductReturnType::Type + const typename TransformProductReturnType::Type operator * (const MatrixBase &other) const; /** Contatenates two transformations */ - const Product + const typename ProductReturnType::Type operator * (const Transform& other) const { return m_matrix * other.matrix(); } @@ -192,7 +192,7 @@ QMatrix Transform::toQMatrix(void) const template template -const typename Transform::template ProductReturnType::Type +const typename Transform::template TransformProductReturnType::Type Transform::operator*(const MatrixBase &other) const { return ei_transform_product_impl::run(*this,other.derived()); @@ -380,7 +380,7 @@ template struct Transform::ei_transform_product_impl { typedef typename Transform::MatrixType MatrixType; - typedef Product ResultType; + typedef typename ProductReturnType::Type ResultType; static ResultType run(const Transform& tr, const Other& other) { return tr.matrix() * other; } }; @@ -390,7 +390,7 @@ template struct Transform::ei_transform_product_impl { typedef typename Transform::MatrixType MatrixType; - typedef Product ResultType; + typedef typename ProductReturnType::Type ResultType; static ResultType run(const Transform& tr, const Other& other) { return tr.matrix() * other; } }; @@ -404,7 +404,7 @@ struct Transform::ei_transform_product_impl ei_scalar_multiple_op, NestByValue, - NestByValue,Other> >, + NestByValue,Other>::Type >, NestByValue::VectorRef> > > > ResultType; // FIXME shall we offer an optimized version when the last row is know to be 0,0...,0,1 ? diff --git a/bench/basicbenchmark.cpp b/bench/basicbenchmark.cpp index c44ed4514..25101270e 100644 --- a/bench/basicbenchmark.cpp +++ b/bench/basicbenchmark.cpp @@ -4,7 +4,7 @@ int main(int argc, char *argv[]) { - // disbale floating point exceptions + // disable floating point exceptions // this leads to more stable bench results // (this is done by default by ICC) #ifndef __INTEL_COMPILER diff --git a/Eigen/src/Array/ArrayBase.h b/disabled/ArrayBase.h similarity index 100% rename from Eigen/src/Array/ArrayBase.h rename to disabled/ArrayBase.h diff --git a/test/product.cpp b/test/product.cpp index a89497763..f1e26d20a 100644 --- a/test/product.cpp +++ b/test/product.cpp @@ -61,7 +61,7 @@ template void product(const MatrixType& m) // (we use Transpose.h but this doesn't count as a test for it) VERIFY_IS_APPROX((m1*m1.transpose())*m2, m1*(m1.transpose()*m2)); m3 = m1; - m3 *= (m1.transpose() * m2); + m3 *= m1.transpose() * m2; VERIFY_IS_APPROX(m3, m1 * (m1.transpose()*m2)); VERIFY_IS_APPROX(m3, m1.lazy() * (m1.transpose()*m2)); @@ -91,6 +91,8 @@ void test_product() CALL_SUBTEST( product(Matrix3i()) ); CALL_SUBTEST( product(Matrix()) ); CALL_SUBTEST( product(Matrix4d()) ); + CALL_SUBTEST( product(Matrix4f()) ); + CALL_SUBTEST( product(MatrixXf(3,5)) ); } for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST( product(MatrixXf(ei_random(1,320), ei_random(1,320))) );