mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
* find the proper way of nesting the expression in Flagged:
finally that's more subtle than just using ei_nested, because when flagging with NestByValueBit we want to store the expression by value already, regardless of whether it already had the NestByValueBit set. * rename temporary() ----> nestByValue() * move the old Product.h to disabled/, replace by what was ProductWIP.h * tweak -O and -g flags for tests and examples * reorder the tests -- basic things go first * simplifications, e.g. in many methoeds return derived() and count on implicit casting to the actual return type. * strip some not-really-useful stuff from the heaviest tests
This commit is contained in:
@@ -118,7 +118,7 @@ template<typename Derived>
|
||||
inline const CwiseUnaryOp<ei_scalar_opposite_op<typename ei_traits<Derived>::Scalar>,Derived>
|
||||
MatrixBase<Derived>::operator-() const
|
||||
{
|
||||
return CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, Derived>(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise absolute value of \c *this
|
||||
@@ -127,7 +127,7 @@ template<typename Derived>
|
||||
inline const CwiseUnaryOp<ei_scalar_abs_op<typename ei_traits<Derived>::Scalar>,Derived>
|
||||
MatrixBase<Derived>::cwiseAbs() const
|
||||
{
|
||||
return CwiseUnaryOp<ei_scalar_abs_op<Scalar>,Derived>(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise squared absolute value of \c *this
|
||||
@@ -136,7 +136,7 @@ template<typename Derived>
|
||||
inline const CwiseUnaryOp<ei_scalar_abs2_op<typename ei_traits<Derived>::Scalar>,Derived>
|
||||
MatrixBase<Derived>::cwiseAbs2() const
|
||||
{
|
||||
return CwiseUnaryOp<ei_scalar_abs2_op<Scalar>,Derived>(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \returns an expression of the complex conjugate of *this.
|
||||
@@ -146,7 +146,7 @@ template<typename Derived>
|
||||
inline const CwiseUnaryOp<ei_scalar_conjugate_op<typename ei_traits<Derived>::Scalar>, Derived>
|
||||
MatrixBase<Derived>::conjugate() const
|
||||
{
|
||||
return CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, Derived>(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \returns an expression of *this with the \a Scalar type casted to
|
||||
@@ -161,7 +161,7 @@ template<typename NewType>
|
||||
inline const CwiseUnaryOp<ei_scalar_cast_op<typename ei_traits<Derived>::Scalar, NewType>, Derived>
|
||||
MatrixBase<Derived>::cast() const
|
||||
{
|
||||
return CwiseUnaryOp<ei_scalar_cast_op<Scalar, NewType>, Derived>(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \relates MatrixBase */
|
||||
@@ -201,7 +201,7 @@ template<typename Derived>
|
||||
inline const CwiseUnaryOp<ei_scalar_sqrt_op<typename ei_traits<Derived>::Scalar>, Derived>
|
||||
MatrixBase<Derived>::cwiseSqrt() const
|
||||
{
|
||||
return CwiseUnaryOp<ei_scalar_sqrt_op<Scalar>, Derived>(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise exponential of *this. */
|
||||
@@ -209,7 +209,7 @@ template<typename Derived>
|
||||
inline const CwiseUnaryOp<ei_scalar_exp_op<typename ei_traits<Derived>::Scalar>, Derived>
|
||||
MatrixBase<Derived>::cwiseExp() const
|
||||
{
|
||||
return CwiseUnaryOp<ei_scalar_exp_op<Scalar>, Derived>(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise logarithm of *this. */
|
||||
@@ -217,7 +217,7 @@ template<typename Derived>
|
||||
inline const CwiseUnaryOp<ei_scalar_log_op<typename ei_traits<Derived>::Scalar>, Derived>
|
||||
MatrixBase<Derived>::cwiseLog() const
|
||||
{
|
||||
return CwiseUnaryOp<ei_scalar_log_op<Scalar>, Derived>(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise cosine of *this. */
|
||||
@@ -225,7 +225,7 @@ template<typename Derived>
|
||||
inline const CwiseUnaryOp<ei_scalar_cos_op<typename ei_traits<Derived>::Scalar>, Derived>
|
||||
MatrixBase<Derived>::cwiseCos() const
|
||||
{
|
||||
return CwiseUnaryOp<ei_scalar_cos_op<Scalar>, Derived>(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise sine of *this. */
|
||||
@@ -233,10 +233,10 @@ template<typename Derived>
|
||||
inline const CwiseUnaryOp<ei_scalar_sin_op<typename ei_traits<Derived>::Scalar>, Derived>
|
||||
MatrixBase<Derived>::cwiseSin() const
|
||||
{
|
||||
return CwiseUnaryOp<ei_scalar_sin_op<Scalar>, Derived>(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \relates MatrixBase */
|
||||
/** \returns an expression of the coefficient-wise power of *this to the given exponent. */
|
||||
template<typename Derived>
|
||||
inline const CwiseUnaryOp<ei_scalar_pow_op<typename ei_traits<Derived>::Scalar>, Derived>
|
||||
MatrixBase<Derived>::cwisePow(const Scalar& exponent) const
|
||||
|
||||
@@ -95,7 +95,7 @@ template<typename Derived>
|
||||
inline const DiagonalMatrix<Derived>
|
||||
MatrixBase<Derived>::asDiagonal() const
|
||||
{
|
||||
return DiagonalMatrix<Derived>(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \returns true if *this is approximately equal to a diagonal matrix,
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
* \param Added the flags added to the expression
|
||||
* \param Removed the flags removed from the expression (has priority over Added).
|
||||
*
|
||||
* This class represents an expression whose flags have been modified
|
||||
* This class represents an expression whose flags have been modified.
|
||||
* It is the return type of MatrixBase::flagged()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
@@ -94,7 +94,11 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
|
||||
}
|
||||
|
||||
protected:
|
||||
const ExpressionType m_matrix;
|
||||
const typename ei_meta_if<
|
||||
Added & ~Removed & NestByValueBit,
|
||||
ExpressionType,
|
||||
typename ExpressionType::Nested
|
||||
>::ret m_matrix;
|
||||
};
|
||||
|
||||
/** \returns an expression of *this with added flags
|
||||
@@ -121,7 +125,7 @@ MatrixBase<Derived>::lazy() const
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const Flagged<Derived, NestByValueBit, 0>
|
||||
MatrixBase<Derived>::temporary() const
|
||||
MatrixBase<Derived>::nestByValue() const
|
||||
{
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -452,7 +452,7 @@ template<typename Derived> class MatrixBase
|
||||
template<unsigned int Added>
|
||||
const Flagged<Derived, Added, 0> marked() const;
|
||||
const Flagged<Derived, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit> lazy() const;
|
||||
const Flagged<Derived, NestByValueBit, 0> temporary() const;
|
||||
const Flagged<Derived, NestByValueBit, 0> nestByValue() const;
|
||||
|
||||
/** \returns number of elements to skip to pass from one row (resp. column) to another
|
||||
* for a row-major (resp. column-major) matrix.
|
||||
|
||||
@@ -41,7 +41,7 @@ template<int Size, typename Lhs, typename Rhs>
|
||||
struct ei_product_unroller<0, Size, Lhs, Rhs>
|
||||
{
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
|
||||
typename Lhs::Scalar &res)
|
||||
typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
||||
}
|
||||
@@ -60,12 +60,6 @@ struct ei_product_unroller<Index, 0, Lhs, Rhs>
|
||||
inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct ei_product_unroller<0, Dynamic, Lhs, Rhs>
|
||||
{
|
||||
static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
|
||||
};
|
||||
|
||||
template<bool RowMajor, int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar>
|
||||
struct ei_packet_product_unroller;
|
||||
|
||||
@@ -119,12 +113,6 @@ struct ei_packet_product_unroller<false, Index, Dynamic, Lhs, Rhs, PacketScalar>
|
||||
inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename PacketScalar>
|
||||
struct ei_packet_product_unroller<false, 0, Dynamic, Lhs, Rhs, PacketScalar>
|
||||
{
|
||||
static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
|
||||
};
|
||||
|
||||
template<typename Product, bool RowMajor = true> struct ProductPacketCoeffImpl {
|
||||
inline static typename Product::PacketScalar execute(const Product& product, int row, int col)
|
||||
{ return product._packetCoeffRowMajor(row,col); }
|
||||
@@ -153,18 +141,74 @@ template<typename Lhs, typename Rhs> struct ei_product_eval_mode
|
||||
{
|
||||
enum{ value = Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& (!( (Lhs::Flags&RowMajorBit) && ((Rhs::Flags&RowMajorBit) ^ RowMajorBit)))
|
||||
&& Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
? CacheFriendlyProduct : NormalProduct };
|
||||
};
|
||||
|
||||
template<typename T> class ei_product_eval_to_column_major
|
||||
{
|
||||
typedef typename ei_traits<T>::Scalar _Scalar;
|
||||
enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime,
|
||||
_MaxCols = ei_traits<T>::MaxColsAtCompileTime,
|
||||
_Flags = ei_traits<T>::Flags
|
||||
};
|
||||
|
||||
public:
|
||||
typedef Matrix<_Scalar,
|
||||
ei_traits<T>::RowsAtCompileTime,
|
||||
ei_traits<T>::ColsAtCompileTime,
|
||||
ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _Flags>::ret & ~RowMajorBit,
|
||||
ei_traits<T>::MaxRowsAtCompileTime,
|
||||
ei_traits<T>::MaxColsAtCompileTime> type;
|
||||
};
|
||||
|
||||
template<typename T, int n=1> struct ei_product_nested_rhs
|
||||
{
|
||||
typedef typename ei_meta_if<
|
||||
(ei_traits<T>::Flags & NestByValueBit) && (!(ei_traits<T>::Flags & RowMajorBit)) && (int(ei_traits<T>::Flags) & DirectAccessBit),
|
||||
T,
|
||||
typename ei_meta_if<
|
||||
((ei_traits<T>::Flags & EvalBeforeNestingBit)
|
||||
|| (ei_traits<T>::Flags & RowMajorBit)
|
||||
|| (!(ei_traits<T>::Flags & DirectAccessBit))
|
||||
|| (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost),
|
||||
typename ei_product_eval_to_column_major<T>::type,
|
||||
const T&
|
||||
>::ret
|
||||
>::ret type;
|
||||
};
|
||||
|
||||
template<typename T, int n=1> struct ei_product_nested_lhs
|
||||
{
|
||||
typedef typename ei_meta_if<
|
||||
ei_traits<T>::Flags & NestByValueBit && (int(ei_traits<T>::Flags) & DirectAccessBit),
|
||||
T,
|
||||
typename ei_meta_if<
|
||||
int(ei_traits<T>::Flags) & EvalBeforeNestingBit
|
||||
|| (!(int(ei_traits<T>::Flags) & DirectAccessBit))
|
||||
|| (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost),
|
||||
typename ei_eval<T>::type,
|
||||
const T&
|
||||
>::ret
|
||||
>::ret type;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int EvalMode>
|
||||
struct ei_traits<Product<Lhs, Rhs, EvalMode> >
|
||||
{
|
||||
typedef typename Lhs::Scalar Scalar;
|
||||
typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
|
||||
typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
|
||||
typedef typename ei_unref<LhsNested>::type _LhsNested;
|
||||
typedef typename ei_unref<RhsNested>::type _RhsNested;
|
||||
// the cache friendly product evals lhs once only
|
||||
// FIXME what to do if we chose to dynamically call the normal product from the cache friendly one for small matrices ?
|
||||
typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
|
||||
typename ei_product_nested_lhs<Lhs,1>::type,
|
||||
typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type>::ret LhsNested;
|
||||
|
||||
// NOTE that rhs must be ColumnMajor, so we might need a special nested type calculation
|
||||
typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
|
||||
typename ei_product_nested_rhs<Rhs,Lhs::RowsAtCompileTime>::type,
|
||||
typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested;
|
||||
typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
|
||||
typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
|
||||
enum {
|
||||
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
|
||||
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
|
||||
@@ -174,6 +218,8 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
|
||||
ColsAtCompileTime = Rhs::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
|
||||
// the vectorization flags are only used by the normal product,
|
||||
// the other one is always vectorized !
|
||||
_RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0,
|
||||
@@ -207,6 +253,10 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
typedef typename ei_traits<Product>::_LhsNested _LhsNested;
|
||||
typedef typename ei_traits<Product>::_RhsNested _RhsNested;
|
||||
|
||||
enum {
|
||||
PacketSize = ei_packet_traits<Scalar>::size
|
||||
};
|
||||
|
||||
inline Product(const Lhs& lhs, const Rhs& rhs)
|
||||
: m_lhs(lhs), m_rhs(rhs)
|
||||
{
|
||||
@@ -214,12 +264,12 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
template<typename DestDerived, int AlignedMode>
|
||||
void _cacheOptimalEval(DestDerived& res, ei_meta_false) const;
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
template<typename DestDerived, int AlignedMode>
|
||||
void _cacheOptimalEval(DestDerived& res, ei_meta_true) const;
|
||||
#endif
|
||||
template<typename DestDerived>
|
||||
void _cacheFriendlyEval(DestDerived& res) const;
|
||||
|
||||
/** \internal */
|
||||
template<typename DestDerived>
|
||||
void _cacheFriendlyEvalAndAdd(DestDerived& res) const;
|
||||
|
||||
private:
|
||||
|
||||
@@ -252,7 +302,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT)
|
||||
{
|
||||
PacketScalar res;
|
||||
ei_packet_product_unroller<Flags&RowMajorBit, Lhs::ColsAtCompileTime-1,
|
||||
ei_packet_product_unroller<Flags&RowMajorBit ? true : false, Lhs::ColsAtCompileTime-1,
|
||||
Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT
|
||||
? Lhs::ColsAtCompileTime : Dynamic,
|
||||
_LhsNested, _RhsNested, PacketScalar>
|
||||
@@ -279,16 +329,10 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
for(int i = 1; i < m_lhs.cols(); i++)
|
||||
res = ei_pmadd(m_lhs.template packetCoeff<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res);
|
||||
return res;
|
||||
// const PacketScalar tmp[4];
|
||||
// ei_punpack(m_rhs.packetCoeff(0,col), tmp);
|
||||
//
|
||||
// return
|
||||
// ei_pmadd(m_lhs.packetCoeff(row, 0), tmp[0],
|
||||
// ei_pmadd(m_lhs.packetCoeff(row, 1), tmp[1],
|
||||
// ei_pmadd(m_lhs.packetCoeff(row, 2), tmp[2]
|
||||
// ei_pmul(m_lhs.packetCoeff(row, 3), tmp[3]))));
|
||||
}
|
||||
|
||||
template<typename Lhs_, typename Rhs_, int EvalMode_, typename DestDerived_, bool DirectAccess_>
|
||||
friend struct ei_cache_friendly_selector;
|
||||
|
||||
protected:
|
||||
const LhsNested m_lhs;
|
||||
@@ -296,9 +340,6 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
};
|
||||
|
||||
/** \returns the matrix product of \c *this and \a other.
|
||||
*
|
||||
* \note This function causes an immediate evaluation. If you want to perform a matrix product
|
||||
* without immediate evaluation, call .lazy() on one of the matrices before taking the product.
|
||||
*
|
||||
* \sa lazy(), operator*=(const MatrixBase&)
|
||||
*/
|
||||
@@ -322,168 +363,107 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
|
||||
return *this = *this * other;
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
template<typename Derived>
|
||||
template<typename Lhs,typename Rhs>
|
||||
inline Derived&
|
||||
MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other)
|
||||
{
|
||||
other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<typename Lhs, typename Rhs>
|
||||
inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product)
|
||||
{
|
||||
product.template _cacheOptimalEval<Derived, Aligned>(derived(),
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
typename ei_meta_if<Flags & VectorizableBit, ei_meta_true, ei_meta_false>::ret()
|
||||
#else
|
||||
ei_meta_false()
|
||||
#endif
|
||||
);
|
||||
product._cacheFriendlyEval(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Lhs, typename Rhs, int EvalMode>
|
||||
template<typename DestDerived, int AlignedMode>
|
||||
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_false) const
|
||||
template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived, bool DirectAccess>
|
||||
struct ei_cache_friendly_selector
|
||||
{
|
||||
res.setZero();
|
||||
const int cols4 = m_lhs.cols() & 0xfffffffC;
|
||||
if (Lhs::Flags&RowMajorBit)
|
||||
typedef Product<Lhs,Rhs,EvalMode> Prod;
|
||||
typedef typename Prod::_LhsNested _LhsNested;
|
||||
typedef typename Prod::_RhsNested _RhsNested;
|
||||
typedef typename Prod::Scalar Scalar;
|
||||
static inline void eval(const Prod& product, DestDerived& res)
|
||||
{
|
||||
// std::cout << "opt rhs\n";
|
||||
int j=0;
|
||||
for(; j<cols4; j+=4)
|
||||
if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
)
|
||||
{
|
||||
for(int k=0; k<this->rows(); ++k)
|
||||
{
|
||||
const Scalar tmp0 = m_lhs.coeff(k,j );
|
||||
const Scalar tmp1 = m_lhs.coeff(k,j+1);
|
||||
const Scalar tmp2 = m_lhs.coeff(k,j+2);
|
||||
const Scalar tmp3 = m_lhs.coeff(k,j+3);
|
||||
for (int i=0; i<this->cols(); ++i)
|
||||
res.coeffRef(k,i) += tmp0 * m_rhs.coeff(j+0,i) + tmp1 * m_rhs.coeff(j+1,i)
|
||||
+ tmp2 * m_rhs.coeff(j+2,i) + tmp3 * m_rhs.coeff(j+3,i);
|
||||
}
|
||||
res.setZero();
|
||||
ei_cache_friendly_product<Scalar>(
|
||||
product._rows(), product._cols(), product.m_lhs.cols(),
|
||||
_LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(),
|
||||
_RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(),
|
||||
Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
|
||||
);
|
||||
}
|
||||
for(; j<m_lhs.cols(); ++j)
|
||||
else
|
||||
{
|
||||
for(int k=0; k<this->rows(); ++k)
|
||||
{
|
||||
const Scalar tmp = m_rhs.coeff(k,j);
|
||||
for (int i=0; i<this->cols(); ++i)
|
||||
res.coeffRef(k,i) += tmp * m_lhs.coeff(j,i);
|
||||
}
|
||||
res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
static inline void eval_and_add(const Prod& product, DestDerived& res)
|
||||
{
|
||||
// std::cout << "opt lhs\n";
|
||||
int j = 0;
|
||||
for(; j<cols4; j+=4)
|
||||
if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
)
|
||||
{
|
||||
for(int k=0; k<this->cols(); ++k)
|
||||
{
|
||||
const Scalar tmp0 = m_rhs.coeff(j ,k);
|
||||
const Scalar tmp1 = m_rhs.coeff(j+1,k);
|
||||
const Scalar tmp2 = m_rhs.coeff(j+2,k);
|
||||
const Scalar tmp3 = m_rhs.coeff(j+3,k);
|
||||
for (int i=0; i<this->rows(); ++i)
|
||||
res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j+0) + tmp1 * m_lhs.coeff(i,j+1)
|
||||
+ tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3);
|
||||
}
|
||||
ei_cache_friendly_product<Scalar>(
|
||||
product._rows(), product._cols(), product.m_lhs.cols(),
|
||||
_LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(),
|
||||
_RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(),
|
||||
Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
|
||||
);
|
||||
}
|
||||
for(; j<m_lhs.cols(); ++j)
|
||||
else
|
||||
{
|
||||
for(int k=0; k<this->cols(); ++k)
|
||||
{
|
||||
const Scalar tmp = m_rhs.coeff(j,k);
|
||||
for (int i=0; i<this->rows(); ++i)
|
||||
res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j);
|
||||
}
|
||||
res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived>
|
||||
struct ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,false>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,EvalMode> Prod;
|
||||
typedef typename Prod::_LhsNested _LhsNested;
|
||||
typedef typename Prod::_RhsNested _RhsNested;
|
||||
typedef typename Prod::Scalar Scalar;
|
||||
static inline void eval(const Prod& product, DestDerived& res)
|
||||
{
|
||||
res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
|
||||
}
|
||||
|
||||
static inline void eval_and_add(const Prod& product, DestDerived& res)
|
||||
{
|
||||
res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int EvalMode>
|
||||
template<typename DestDerived>
|
||||
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) const
|
||||
{
|
||||
ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
|
||||
_LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
|
||||
::eval(*this, res);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
template<typename Lhs, typename Rhs, int EvalMode>
|
||||
template<typename DestDerived, int AlignedMode>
|
||||
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true) const
|
||||
template<typename DestDerived>
|
||||
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const
|
||||
{
|
||||
|
||||
if (((Lhs::Flags&RowMajorBit) && (_cols() % ei_packet_traits<Scalar>::size != 0))
|
||||
|| (_rows() % ei_packet_traits<Scalar>::size != 0))
|
||||
{
|
||||
return _cacheOptimalEval<DestDerived, AlignedMode>(res, ei_meta_false());
|
||||
}
|
||||
|
||||
res.setZero();
|
||||
const int cols4 = m_lhs.cols() & 0xfffffffC;
|
||||
if (Lhs::Flags&RowMajorBit)
|
||||
{
|
||||
// std::cout << "packet rhs\n";
|
||||
int j=0;
|
||||
for(; j<cols4; j+=4)
|
||||
{
|
||||
for(int k=0; k<this->rows(); k++)
|
||||
{
|
||||
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_lhs.coeff(k,j+0));
|
||||
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_lhs.coeff(k,j+1));
|
||||
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_lhs.coeff(k,j+2));
|
||||
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_lhs.coeff(k,j+3));
|
||||
for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
|
||||
{
|
||||
res.template writePacketCoeff<AlignedMode>(k,i,
|
||||
ei_pmadd(tmp0, m_rhs.template packetCoeff<AlignedMode>(j+0,i),
|
||||
ei_pmadd(tmp1, m_rhs.template packetCoeff<AlignedMode>(j+1,i),
|
||||
ei_pmadd(tmp2, m_rhs.template packetCoeff<AlignedMode>(j+2,i),
|
||||
ei_pmadd(tmp3, m_rhs.template packetCoeff<AlignedMode>(j+3,i),
|
||||
res.template packetCoeff<AlignedMode>(k,i)))))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
for(; j<m_lhs.cols(); ++j)
|
||||
{
|
||||
for(int k=0; k<this->rows(); k++)
|
||||
{
|
||||
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_lhs.coeff(k,j));
|
||||
for (int i=0; i<this->cols(); i+=ei_packet_traits<Scalar>::size)
|
||||
res.template writePacketCoeff<AlignedMode>(k,i,
|
||||
ei_pmadd(tmp, m_rhs.template packetCoeff<AlignedMode>(j,i), res.template packetCoeff<AlignedMode>(k,i)));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// std::cout << "packet lhs\n";
|
||||
int k=0;
|
||||
for(; k<cols4; k+=4)
|
||||
{
|
||||
for(int j=0; j<this->cols(); j+=1)
|
||||
{
|
||||
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(k+0,j));
|
||||
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(k+1,j));
|
||||
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(k+2,j));
|
||||
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(k+3,j));
|
||||
|
||||
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
|
||||
{
|
||||
res.template writePacketCoeff<AlignedMode>(i,j,
|
||||
ei_pmadd(tmp0, m_lhs.template packetCoeff<AlignedMode>(i,k),
|
||||
ei_pmadd(tmp1, m_lhs.template packetCoeff<AlignedMode>(i,k+1),
|
||||
ei_pmadd(tmp2, m_lhs.template packetCoeff<AlignedMode>(i,k+2),
|
||||
ei_pmadd(tmp3, m_lhs.template packetCoeff<AlignedMode>(i,k+3),
|
||||
res.template packetCoeff<AlignedMode>(i,j)))))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
for(; k<m_lhs.cols(); ++k)
|
||||
{
|
||||
for(int j=0; j<this->cols(); j++)
|
||||
{
|
||||
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(k,j));
|
||||
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
|
||||
res.template writePacketCoeff<AlignedMode>(k,j,
|
||||
ei_pmadd(tmp, m_lhs.template packetCoeff<AlignedMode>(i,k), res.template packetCoeff<AlignedMode>(i,j)));
|
||||
}
|
||||
}
|
||||
}
|
||||
ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
|
||||
_LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
|
||||
::eval_and_add(*this, res);
|
||||
}
|
||||
#endif // EIGEN_VECTORIZE
|
||||
|
||||
#endif // EIGEN_PRODUCT_H
|
||||
|
||||
@@ -1,471 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra. Eigen itself is part of the KDE project.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
|
||||
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef EIGEN_PRODUCT_H
|
||||
#define EIGEN_PRODUCT_H
|
||||
|
||||
#include "CacheFriendlyProduct.h"
|
||||
|
||||
template<int Index, int Size, typename Lhs, typename Rhs>
|
||||
struct ei_product_unroller
|
||||
{
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
|
||||
typename Lhs::Scalar &res)
|
||||
{
|
||||
ei_product_unroller<Index-1, Size, Lhs, Rhs>::run(row, col, lhs, rhs, res);
|
||||
res += lhs.coeff(row, Index) * rhs.coeff(Index, col);
|
||||
}
|
||||
};
|
||||
|
||||
template<int Size, typename Lhs, typename Rhs>
|
||||
struct ei_product_unroller<0, Size, Lhs, Rhs>
|
||||
{
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs,
|
||||
typename Lhs::Scalar &res)
|
||||
{
|
||||
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
||||
}
|
||||
};
|
||||
|
||||
template<int Index, typename Lhs, typename Rhs>
|
||||
struct ei_product_unroller<Index, Dynamic, Lhs, Rhs>
|
||||
{
|
||||
inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
|
||||
};
|
||||
|
||||
// prevent buggy user code from causing an infinite recursion
|
||||
template<int Index, typename Lhs, typename Rhs>
|
||||
struct ei_product_unroller<Index, 0, Lhs, Rhs>
|
||||
{
|
||||
inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
|
||||
};
|
||||
|
||||
template<bool RowMajor, int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar>
|
||||
struct ei_packet_product_unroller;
|
||||
|
||||
template<int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar>
|
||||
struct ei_packet_product_unroller<true, Index, Size, Lhs, Rhs, PacketScalar>
|
||||
{
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
{
|
||||
ei_packet_product_unroller<true, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
|
||||
res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packetCoeff<Aligned>(Index, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
template<int Index, int Size, typename Lhs, typename Rhs, typename PacketScalar>
|
||||
struct ei_packet_product_unroller<false, Index, Size, Lhs, Rhs, PacketScalar>
|
||||
{
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
{
|
||||
ei_packet_product_unroller<false, Index-1, Size, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, res);
|
||||
res = ei_pmadd(lhs.template packetCoeff<Aligned>(row, Index), ei_pset1(rhs.coeff(Index, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
template<int Size, typename Lhs, typename Rhs, typename PacketScalar>
|
||||
struct ei_packet_product_unroller<true, 0, Size, Lhs, Rhs, PacketScalar>
|
||||
{
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
{
|
||||
res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packetCoeff<Aligned>(0, col));
|
||||
}
|
||||
};
|
||||
|
||||
template<int Size, typename Lhs, typename Rhs, typename PacketScalar>
|
||||
struct ei_packet_product_unroller<false, 0, Size, Lhs, Rhs, PacketScalar>
|
||||
{
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
|
||||
{
|
||||
res = ei_pmul(lhs.template packetCoeff<Aligned>(row, 0), ei_pset1(rhs.coeff(0, col)));
|
||||
}
|
||||
};
|
||||
|
||||
template<bool RowMajor, int Index, typename Lhs, typename Rhs, typename PacketScalar>
|
||||
struct ei_packet_product_unroller<RowMajor, Index, Dynamic, Lhs, Rhs, PacketScalar>
|
||||
{
|
||||
inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
|
||||
};
|
||||
|
||||
template<int Index, typename Lhs, typename Rhs, typename PacketScalar>
|
||||
struct ei_packet_product_unroller<false, Index, Dynamic, Lhs, Rhs, PacketScalar>
|
||||
{
|
||||
inline static void run(int, int, const Lhs&, const Rhs&, PacketScalar&) {}
|
||||
};
|
||||
|
||||
template<typename Product, bool RowMajor = true> struct ProductPacketCoeffImpl {
|
||||
inline static typename Product::PacketScalar execute(const Product& product, int row, int col)
|
||||
{ return product._packetCoeffRowMajor(row,col); }
|
||||
};
|
||||
|
||||
template<typename Product> struct ProductPacketCoeffImpl<Product, false> {
|
||||
inline static typename Product::PacketScalar execute(const Product& product, int row, int col)
|
||||
{ return product._packetCoeffColumnMajor(row,col); }
|
||||
};
|
||||
|
||||
/** \class Product
|
||||
*
|
||||
* \brief Expression of the product of two matrices
|
||||
*
|
||||
* \param Lhs the type of the left-hand side
|
||||
* \param Rhs the type of the right-hand side
|
||||
* \param EvalMode internal use only
|
||||
*
|
||||
* This class represents an expression of the product of two matrices.
|
||||
* It is the return type of the operator* between matrices, and most of the time
|
||||
* this is the only way it is used.
|
||||
*
|
||||
* \sa class Sum, class Difference
|
||||
*/
|
||||
template<typename Lhs, typename Rhs> struct ei_product_eval_mode
|
||||
{
|
||||
enum{ value = Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
? CacheFriendlyProduct : NormalProduct };
|
||||
};
|
||||
|
||||
template<typename T> class ei_product_eval_to_column_major
|
||||
{
|
||||
typedef typename ei_traits<T>::Scalar _Scalar;
|
||||
enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime,
|
||||
_MaxCols = ei_traits<T>::MaxColsAtCompileTime,
|
||||
_Flags = ei_traits<T>::Flags
|
||||
};
|
||||
|
||||
public:
|
||||
typedef Matrix<_Scalar,
|
||||
ei_traits<T>::RowsAtCompileTime,
|
||||
ei_traits<T>::ColsAtCompileTime,
|
||||
ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _Flags>::ret & ~RowMajorBit,
|
||||
ei_traits<T>::MaxRowsAtCompileTime,
|
||||
ei_traits<T>::MaxColsAtCompileTime> type;
|
||||
};
|
||||
|
||||
template<typename T, int n=1> struct ei_product_nested_rhs
|
||||
{
|
||||
typedef typename ei_meta_if<
|
||||
(ei_traits<T>::Flags & NestByValueBit) && (!(ei_traits<T>::Flags & RowMajorBit)) && (int(ei_traits<T>::Flags) & DirectAccessBit),
|
||||
T,
|
||||
typename ei_meta_if<
|
||||
((ei_traits<T>::Flags & EvalBeforeNestingBit)
|
||||
|| (ei_traits<T>::Flags & RowMajorBit)
|
||||
|| (!(ei_traits<T>::Flags & DirectAccessBit))
|
||||
|| (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost),
|
||||
typename ei_product_eval_to_column_major<T>::type,
|
||||
const T&
|
||||
>::ret
|
||||
>::ret type;
|
||||
};
|
||||
|
||||
template<typename T, int n=1> struct ei_product_nested_lhs
|
||||
{
|
||||
typedef typename ei_meta_if<
|
||||
ei_traits<T>::Flags & NestByValueBit && (int(ei_traits<T>::Flags) & DirectAccessBit),
|
||||
T,
|
||||
typename ei_meta_if<
|
||||
int(ei_traits<T>::Flags) & EvalBeforeNestingBit
|
||||
|| (!(int(ei_traits<T>::Flags) & DirectAccessBit))
|
||||
|| (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost),
|
||||
typename ei_eval<T>::type,
|
||||
const T&
|
||||
>::ret
|
||||
>::ret type;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int EvalMode>
|
||||
struct ei_traits<Product<Lhs, Rhs, EvalMode> >
|
||||
{
|
||||
typedef typename Lhs::Scalar Scalar;
|
||||
// the cache friendly product evals lhs once only
|
||||
// FIXME what to do if we chose to dynamically call the normal product from the cache friendly one for small matrices ?
|
||||
typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
|
||||
typename ei_product_nested_lhs<Lhs,1>::type,
|
||||
typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type>::ret LhsNested;
|
||||
|
||||
// NOTE that rhs must be ColumnMajor, so we might need a special nested type calculation
|
||||
typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
|
||||
typename ei_product_nested_rhs<Rhs,Lhs::RowsAtCompileTime>::type,
|
||||
typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested;
|
||||
typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
|
||||
typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
|
||||
enum {
|
||||
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
|
||||
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
|
||||
LhsFlags = _LhsNested::Flags,
|
||||
RhsFlags = _RhsNested::Flags,
|
||||
RowsAtCompileTime = Lhs::RowsAtCompileTime,
|
||||
ColsAtCompileTime = Rhs::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
|
||||
// the vectorization flags are only used by the normal product,
|
||||
// the other one is always vectorized !
|
||||
_RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0,
|
||||
_RowMajor = (RhsFlags & RowMajorBit)
|
||||
&& (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsVectorizable)),
|
||||
_LostBits = HereditaryBits & ~(
|
||||
(_RowMajor ? 0 : RowMajorBit)
|
||||
| ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)),
|
||||
Flags = ((unsigned int)(LhsFlags | RhsFlags) & _LostBits)
|
||||
| EvalBeforeAssigningBit
|
||||
| EvalBeforeNestingBit
|
||||
| (_Vectorizable ? VectorizableBit : 0),
|
||||
CoeffReadCost
|
||||
= Lhs::ColsAtCompileTime == Dynamic
|
||||
? Dynamic
|
||||
: Lhs::ColsAtCompileTime
|
||||
* (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
|
||||
+ (Lhs::ColsAtCompileTime - 1) * NumTraits<Scalar>::AddCost
|
||||
};
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignment_operator,
|
||||
public MatrixBase<Product<Lhs, Rhs, EvalMode> >
|
||||
{
|
||||
public:
|
||||
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
|
||||
friend class ProductPacketCoeffImpl<Product,Flags&RowMajorBit>;
|
||||
typedef typename ei_traits<Product>::LhsNested LhsNested;
|
||||
typedef typename ei_traits<Product>::RhsNested RhsNested;
|
||||
typedef typename ei_traits<Product>::_LhsNested _LhsNested;
|
||||
typedef typename ei_traits<Product>::_RhsNested _RhsNested;
|
||||
|
||||
enum {
|
||||
PacketSize = ei_packet_traits<Scalar>::size
|
||||
};
|
||||
|
||||
inline Product(const Lhs& lhs, const Rhs& rhs)
|
||||
: m_lhs(lhs), m_rhs(rhs)
|
||||
{
|
||||
ei_assert(lhs.cols() == rhs.rows());
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
template<typename DestDerived>
|
||||
void _cacheFriendlyEval(DestDerived& res) const;
|
||||
|
||||
/** \internal */
|
||||
template<typename DestDerived>
|
||||
void _cacheFriendlyEvalAndAdd(DestDerived& res) const;
|
||||
|
||||
private:
|
||||
|
||||
inline int _rows() const { return m_lhs.rows(); }
|
||||
inline int _cols() const { return m_rhs.cols(); }
|
||||
|
||||
const Scalar _coeff(int row, int col) const
|
||||
{
|
||||
Scalar res;
|
||||
const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
|
||||
if(unroll)
|
||||
{
|
||||
ei_product_unroller<Lhs::ColsAtCompileTime-1,
|
||||
unroll ? Lhs::ColsAtCompileTime : Dynamic,
|
||||
_LhsNested, _RhsNested>
|
||||
::run(row, col, m_lhs, m_rhs, res);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = m_lhs.coeff(row, 0) * m_rhs.coeff(0, col);
|
||||
for(int i = 1; i < m_lhs.cols(); i++)
|
||||
res += m_lhs.coeff(row, i) * m_rhs.coeff(i, col);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
const PacketScalar _packetCoeff(int row, int col) const
|
||||
{
|
||||
if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT)
|
||||
{
|
||||
PacketScalar res;
|
||||
ei_packet_product_unroller<Flags&RowMajorBit ? true : false, Lhs::ColsAtCompileTime-1,
|
||||
Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT
|
||||
? Lhs::ColsAtCompileTime : Dynamic,
|
||||
_LhsNested, _RhsNested, PacketScalar>
|
||||
::run(row, col, m_lhs, m_rhs, res);
|
||||
return res;
|
||||
}
|
||||
else
|
||||
return ProductPacketCoeffImpl<Product,Flags&RowMajorBit>::execute(*this, row, col);
|
||||
}
|
||||
|
||||
const PacketScalar _packetCoeffRowMajor(int row, int col) const
|
||||
{
|
||||
PacketScalar res;
|
||||
res = ei_pmul(ei_pset1(m_lhs.coeff(row, 0)),m_rhs.template packetCoeff<Aligned>(0, col));
|
||||
for(int i = 1; i < m_lhs.cols(); i++)
|
||||
res = ei_pmadd(ei_pset1(m_lhs.coeff(row, i)), m_rhs.template packetCoeff<Aligned>(i, col), res);
|
||||
return res;
|
||||
}
|
||||
|
||||
const PacketScalar _packetCoeffColumnMajor(int row, int col) const
|
||||
{
|
||||
PacketScalar res;
|
||||
res = ei_pmul(m_lhs.template packetCoeff<Aligned>(row, 0), ei_pset1(m_rhs.coeff(0, col)));
|
||||
for(int i = 1; i < m_lhs.cols(); i++)
|
||||
res = ei_pmadd(m_lhs.template packetCoeff<Aligned>(row, i), ei_pset1(m_rhs.coeff(i, col)), res);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename Lhs_, typename Rhs_, int EvalMode_, typename DestDerived_, bool DirectAccess_>
|
||||
friend struct ei_cache_friendly_selector;
|
||||
|
||||
protected:
|
||||
const LhsNested m_lhs;
|
||||
const RhsNested m_rhs;
|
||||
};
|
||||
|
||||
/** \returns the matrix product of \c *this and \a other.
|
||||
*
|
||||
* \sa lazy(), operator*=(const MatrixBase&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline const Product<Derived,OtherDerived>
|
||||
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
|
||||
{
|
||||
return Product<Derived,OtherDerived>(derived(), other.derived());
|
||||
}
|
||||
|
||||
/** replaces \c *this by \c *this * \a other.
|
||||
*
|
||||
* \returns a reference to \c *this
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline Derived &
|
||||
MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
|
||||
{
|
||||
return *this = *this * other;
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
template<typename Derived>
|
||||
template<typename Lhs,typename Rhs>
|
||||
inline Derived&
|
||||
MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other)
|
||||
{
|
||||
other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<typename Lhs, typename Rhs>
|
||||
inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product)
|
||||
{
|
||||
product._cacheFriendlyEval(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived, bool DirectAccess>
|
||||
struct ei_cache_friendly_selector
|
||||
{
|
||||
typedef Product<Lhs,Rhs,EvalMode> Prod;
|
||||
typedef typename Prod::_LhsNested _LhsNested;
|
||||
typedef typename Prod::_RhsNested _RhsNested;
|
||||
typedef typename Prod::Scalar Scalar;
|
||||
static inline void eval(const Prod& product, DestDerived& res)
|
||||
{
|
||||
if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
)
|
||||
{
|
||||
res.setZero();
|
||||
ei_cache_friendly_product<Scalar>(
|
||||
product._rows(), product._cols(), product.m_lhs.cols(),
|
||||
_LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(),
|
||||
_RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(),
|
||||
Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void eval_and_add(const Prod& product, DestDerived& res)
|
||||
{
|
||||
if ( product._rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& product._cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& product.m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
)
|
||||
{
|
||||
ei_cache_friendly_product<Scalar>(
|
||||
product._rows(), product._cols(), product.m_lhs.cols(),
|
||||
_LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(),
|
||||
_RhsNested::Flags&RowMajorBit, &(product.m_rhs.const_cast_derived().coeffRef(0,0)), product.m_rhs.stride(),
|
||||
Prod::Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int EvalMode, typename DestDerived>
|
||||
struct ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,false>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,EvalMode> Prod;
|
||||
typedef typename Prod::_LhsNested _LhsNested;
|
||||
typedef typename Prod::_RhsNested _RhsNested;
|
||||
typedef typename Prod::Scalar Scalar;
|
||||
static inline void eval(const Prod& product, DestDerived& res)
|
||||
{
|
||||
res = Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
|
||||
}
|
||||
|
||||
static inline void eval_and_add(const Prod& product, DestDerived& res)
|
||||
{
|
||||
res += Product<_LhsNested,_RhsNested,NormalProduct>(product.m_lhs, product.m_rhs).lazy();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int EvalMode>
|
||||
template<typename DestDerived>
|
||||
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) const
|
||||
{
|
||||
ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
|
||||
_LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
|
||||
::eval(*this, res);
|
||||
}
|
||||
|
||||
template<typename Lhs, typename Rhs, int EvalMode>
|
||||
template<typename DestDerived>
|
||||
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const
|
||||
{
|
||||
ei_cache_friendly_selector<Lhs,Rhs,EvalMode,DestDerived,
|
||||
_LhsNested::Flags&_RhsNested::Flags&DirectAccessBit>
|
||||
::eval_and_add(*this, res);
|
||||
}
|
||||
|
||||
#endif // EIGEN_PRODUCT_H
|
||||
@@ -107,7 +107,7 @@ template<typename Derived>
|
||||
inline Transpose<Derived>
|
||||
MatrixBase<Derived>::transpose()
|
||||
{
|
||||
return Transpose<Derived>(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** This is the const version of transpose(). \sa adjoint() */
|
||||
@@ -115,7 +115,7 @@ template<typename Derived>
|
||||
inline const Transpose<Derived>
|
||||
MatrixBase<Derived>::transpose() const
|
||||
{
|
||||
return Transpose<Derived>(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \returns an expression of the adjoint (i.e. conjugate transpose) of *this.
|
||||
@@ -130,7 +130,7 @@ inline const Transpose<
|
||||
, NestByValueBit, 0> >
|
||||
MatrixBase<Derived>::adjoint() const
|
||||
{
|
||||
return conjugate().temporary().transpose();
|
||||
return conjugate().nestByValue();
|
||||
}
|
||||
|
||||
#endif // EIGEN_TRANSPOSE_H
|
||||
|
||||
@@ -53,16 +53,16 @@ const unsigned int HereditaryBits = RowMajorBit
|
||||
| EvalBeforeAssigningBit
|
||||
| LargeBit;
|
||||
|
||||
// Possible values for the PartType parameter of part() and the ExtractType parameter of extract()
|
||||
// Possible values for the Mode parameter of part() and of extract()
|
||||
const unsigned int Upper = UpperTriangularBit;
|
||||
const unsigned int StrictlyUpper = UpperTriangularBit | ZeroDiagBit;
|
||||
const unsigned int Lower = LowerTriangularBit;
|
||||
const unsigned int StrictlyLower = LowerTriangularBit | ZeroDiagBit;
|
||||
|
||||
// additional possible values for the PartType parameter of part()
|
||||
// additional possible values for the Mode parameter of part()
|
||||
const unsigned int SelfAdjoint = SelfAdjointBit;
|
||||
|
||||
// additional possible values for the ExtractType parameter of extract()
|
||||
// additional possible values for the Mode parameter of extract()
|
||||
const unsigned int UnitUpper = UpperTriangularBit | UnitDiagBit;
|
||||
const unsigned int UnitLower = LowerTriangularBit | UnitDiagBit;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user