* add ei_predux_mul internal function

* apply Ricard Marxer's prod() patch with fixes for the vectorized path
This commit is contained in:
Gael Guennebaud
2009-02-10 18:06:05 +00:00
parent a0cc5fba0a
commit cbbc6d940b
12 changed files with 397 additions and 1 deletions

View File

@@ -139,6 +139,7 @@ namespace Eigen {
#include "src/Core/DiagonalMatrix.h"
#include "src/Core/DiagonalCoeffs.h"
#include "src/Core/Sum.h"
#include "src/Core/Prod.h"
#include "src/Core/Redux.h"
#include "src/Core/Visitor.h"
#include "src/Core/Fuzzy.h"

View File

@@ -115,6 +115,8 @@ EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
/** \internal */
template <typename BinaryOp, typename Scalar>
@@ -257,6 +259,16 @@ template<typename ExpressionType, int Direction> class PartialRedux
* \sa MatrixBase::count() */
const PartialReduxExpr<ExpressionType, ei_member_count<int>, Direction> count() const
{ return _expression(); }
/** \returns a row (or column) vector expression of the product
* of each column (or row) of the referenced expression.
*
* Example: \include PartialRedux_prod.cpp
* Output: \verbinclude PartialRedux_prod.out
*
* \sa MatrixBase::prod() */
const typename ReturnType<ei_member_prod>::Type prod() const
{ return _expression(); }
/** \returns a matrix expression

View File

@@ -96,6 +96,10 @@ ei_preduxp(const Packet* vecs) { return vecs[0]; }
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux(const Packet& a)
{ return a; }
/** \internal \returns the product of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_mul(const Packet& a)
{ return a; }
/** \internal \returns the reversed elements of \a a*/
template<typename Packet> inline Packet ei_preverse(const Packet& a)
{ return a; }

View File

@@ -535,6 +535,8 @@ template<typename Derived> class MatrixBase
Scalar sum() const;
Scalar trace() const;
Scalar prod() const;
typename ei_traits<Derived>::Scalar minCoeff() const;
typename ei_traits<Derived>::Scalar maxCoeff() const;

262
Eigen/src/Core/Prod.h Normal file
View File

@@ -0,0 +1,262 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_PROD_H
#define EIGEN_PROD_H
/***************************************************************************
* Part 1 : the logic deciding a strategy for vectorization and unrolling
***************************************************************************/
template<typename Derived>
struct ei_prod_traits
{
private:
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size
};
public:
enum {
Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
&& (int(Derived::Flags)&LinearAccessBit)
? LinearVectorization
: NoVectorization
};
private:
enum {
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
+ (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::MulCost,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
};
public:
enum {
Unrolling = Cost <= UnrollingLimit
? CompleteUnrolling
: NoUnrolling
};
};
/***************************************************************************
* Part 2 : unrollers
***************************************************************************/
/*** no vectorization ***/
template<typename Derived, int Start, int Length>
struct ei_prod_novec_unroller
{
enum {
HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
inline static Scalar run(const Derived &mat)
{
return ei_prod_novec_unroller<Derived, Start, HalfLength>::run(mat)
* ei_prod_novec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat);
}
};
template<typename Derived, int Start>
struct ei_prod_novec_unroller<Derived, Start, 1>
{
enum {
col = Start / Derived::RowsAtCompileTime,
row = Start % Derived::RowsAtCompileTime
};
typedef typename Derived::Scalar Scalar;
inline static Scalar run(const Derived &mat)
{
return mat.coeff(row, col);
}
};
/*** vectorization ***/
template<typename Derived, int Start, int Length>
struct ei_prod_vec_unroller
{
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
inline static PacketScalar run(const Derived &mat)
{
return ei_pmul(
ei_prod_vec_unroller<Derived, Start, HalfLength>::run(mat),
ei_prod_vec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat) );
}
};
template<typename Derived, int Start>
struct ei_prod_vec_unroller<Derived, Start, 1>
{
enum {
index = Start * ei_packet_traits<typename Derived::Scalar>::size,
row = int(Derived::Flags)&RowMajorBit
? index / int(Derived::ColsAtCompileTime)
: index % Derived::RowsAtCompileTime,
col = int(Derived::Flags)&RowMajorBit
? index % int(Derived::ColsAtCompileTime)
: index / Derived::RowsAtCompileTime,
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
inline static PacketScalar run(const Derived &mat)
{
return mat.template packet<alignment>(row, col);
}
};
/***************************************************************************
* Part 3 : implementation of all cases
***************************************************************************/
template<typename Derived,
int Vectorization = ei_prod_traits<Derived>::Vectorization,
int Unrolling = ei_prod_traits<Derived>::Unrolling
>
struct ei_prod_impl;
template<typename Derived>
struct ei_prod_impl<Derived, NoVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
static Scalar run(const Derived& mat)
{
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix");
Scalar res;
res = mat.coeff(0, 0);
for(int i = 1; i < mat.rows(); ++i)
res *= mat.coeff(i, 0);
for(int j = 1; j < mat.cols(); ++j)
for(int i = 0; i < mat.rows(); ++i)
res *= mat.coeff(i, j);
return res;
}
};
template<typename Derived>
struct ei_prod_impl<Derived, NoVectorization, CompleteUnrolling>
: public ei_prod_novec_unroller<Derived, 0, Derived::SizeAtCompileTime>
{};
template<typename Derived>
struct ei_prod_impl<Derived, LinearVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
static Scalar run(const Derived& mat)
{
const int size = mat.size();
const int packetSize = ei_packet_traits<Scalar>::size;
const int alignedStart = (Derived::Flags & AlignedBit)
|| !(Derived::Flags & DirectAccessBit)
? 0
: ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size);
enum {
alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
? Aligned : Unaligned
};
const int alignedSize = ((size-alignedStart)/packetSize)*packetSize;
const int alignedEnd = alignedStart + alignedSize;
Scalar res;
if(alignedSize)
{
PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
for(int index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
packet_res = ei_pmul(packet_res, mat.template packet<alignment>(index));
res = ei_predux_mul(packet_res);
}
else // too small to vectorize anything.
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
{
res = Scalar(1);
}
for(int index = 0; index < alignedStart; ++index)
res *= mat.coeff(index);
for(int index = alignedEnd; index < size; ++index)
res *= mat.coeff(index);
return res;
}
};
template<typename Derived>
struct ei_prod_impl<Derived, LinearVectorization, CompleteUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
Size = Derived::SizeAtCompileTime,
VectorizationSize = (Size / PacketSize) * PacketSize
};
static Scalar run(const Derived& mat)
{
Scalar res = ei_predux_mul(ei_prod_vec_unroller<Derived, 0, Size / PacketSize>::run(mat));
if (VectorizationSize != Size)
res *= ei_prod_novec_unroller<Derived, VectorizationSize, Size-VectorizationSize>::run(mat);
return res;
}
};
/***************************************************************************
* Part 4 : implementation of MatrixBase methods
***************************************************************************/
/** \returns the product of all coefficients of *this
*
* Example: \include MatrixBase_prod.cpp
* Output: \verbinclude MatrixBase_prod.out
*
* \sa sum()
*/
template<typename Derived>
inline typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::prod() const
{
typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested;
return ei_prod_impl<ThisNested>::run(derived());
}
#endif // EIGEN_PROD_H

View File

@@ -246,7 +246,7 @@ struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling>
/** \returns the sum of all coefficients of *this
*
* \sa trace()
* \sa trace(), prod()
*/
template<typename Derived>
inline typename ei_traits<Derived>::Scalar

View File

@@ -214,6 +214,23 @@ template<> EIGEN_STRONG_INLINE __m128i ei_preduxp<__m128i>(const __m128i* vecs)
return _mm_add_epi32(tmp0, tmp2);
}
// Other reduction functions:
template<> EIGEN_STRONG_INLINE float ei_predux_mul<__m128>(const __m128& a)
{
__m128 tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double ei_predux_mul<__m128d>(const __m128d& a)
{
return ei_pfirst(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE int ei_predux_mul<__m128i>(const __m128i& a)
{
__m128i tmp = ei_pmul(a, _mm_unpackhi_epi64(a,a));
return ei_pfirst(tmp) * ei_pfirst(_mm_shuffle_epi32(tmp, 1));
}
#if (defined __GNUC__)
// template <> EIGEN_STRONG_INLINE __m128 ei_pmadd(const __m128& a, const __m128& b, const __m128& c)
// {