mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
- cleaner use of OpenMP (no code duplication anymore)
using a macro and _Pragma. - use OpenMP also in cacheOptimalProduct and in the vectorized paths as well - kill the vector assignment unroller. implement in operator= the logic for assigning a row-vector in a col-vector. - CMakeLists support for building tests/examples with -fopenmp and/or -msse2 - updates in bench/, especially replace identity() by ones() which prevents underflows from perturbing bench results.
This commit is contained in:
@@ -28,7 +28,7 @@
|
||||
#define EIGEN_ASSIGN_H
|
||||
|
||||
template<typename Derived1, typename Derived2, int UnrollCount>
|
||||
struct ei_matrix_operator_equals_unroller
|
||||
struct ei_matrix_assignment_unroller
|
||||
{
|
||||
enum {
|
||||
col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
|
||||
@@ -37,13 +37,13 @@ struct ei_matrix_operator_equals_unroller
|
||||
|
||||
static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
ei_matrix_operator_equals_unroller<Derived1, Derived2, UnrollCount-1>::run(dst, src);
|
||||
ei_matrix_assignment_unroller<Derived1, Derived2, UnrollCount-1>::run(dst, src);
|
||||
dst.coeffRef(row, col) = src.coeff(row, col);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_matrix_operator_equals_unroller<Derived1, Derived2, 1>
|
||||
struct ei_matrix_assignment_unroller<Derived1, Derived2, 1>
|
||||
{
|
||||
static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
@@ -53,13 +53,13 @@ struct ei_matrix_operator_equals_unroller<Derived1, Derived2, 1>
|
||||
|
||||
// prevent buggy user code from causing an infinite recursion
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_matrix_operator_equals_unroller<Derived1, Derived2, 0>
|
||||
struct ei_matrix_assignment_unroller<Derived1, Derived2, 0>
|
||||
{
|
||||
static void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_matrix_operator_equals_unroller<Derived1, Derived2, Dynamic>
|
||||
struct ei_matrix_assignment_unroller<Derived1, Derived2, Dynamic>
|
||||
{
|
||||
static void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
@@ -67,7 +67,7 @@ struct ei_matrix_operator_equals_unroller<Derived1, Derived2, Dynamic>
|
||||
//----
|
||||
|
||||
template<typename Derived1, typename Derived2, int Index>
|
||||
struct ei_matrix_operator_equals_packet_unroller
|
||||
struct ei_matrix_assignment_packet_unroller
|
||||
{
|
||||
enum {
|
||||
row = Derived1::Flags&RowMajorBit ? Index / Derived1::ColsAtCompileTime : Index % Derived1::RowsAtCompileTime,
|
||||
@@ -76,14 +76,14 @@ struct ei_matrix_operator_equals_packet_unroller
|
||||
|
||||
static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
ei_matrix_operator_equals_packet_unroller<Derived1, Derived2,
|
||||
ei_matrix_assignment_packet_unroller<Derived1, Derived2,
|
||||
Index-ei_packet_traits<typename Derived1::Scalar>::size>::run(dst, src);
|
||||
dst.writePacketCoeff(row, col, src.packetCoeff(row, col));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_matrix_operator_equals_packet_unroller<Derived1, Derived2, 0 >
|
||||
struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, 0 >
|
||||
{
|
||||
static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
@@ -92,58 +92,22 @@ struct ei_matrix_operator_equals_packet_unroller<Derived1, Derived2, 0 >
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_matrix_operator_equals_packet_unroller<Derived1, Derived2, Dynamic>
|
||||
struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, Dynamic>
|
||||
{
|
||||
static void run(Derived1 &, const Derived2 &) { ei_internal_assert(false && "ei_matrix_operator_equals_packet_unroller"); }
|
||||
};
|
||||
|
||||
//----
|
||||
|
||||
template<typename Derived1, typename Derived2, int UnrollCount>
|
||||
struct ei_vector_operator_equals_unroller
|
||||
{
|
||||
enum { index = UnrollCount - 1 };
|
||||
|
||||
static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
ei_vector_operator_equals_unroller<Derived1, Derived2, UnrollCount-1>::run(dst, src);
|
||||
dst.coeffRef(index) = src.coeff(index);
|
||||
}
|
||||
};
|
||||
|
||||
// prevent buggy user code from causing an infinite recursion
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_vector_operator_equals_unroller<Derived1, Derived2, 0>
|
||||
{
|
||||
static void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_vector_operator_equals_unroller<Derived1, Derived2, 1>
|
||||
{
|
||||
static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
dst.coeffRef(0) = src.coeff(0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_vector_operator_equals_unroller<Derived1, Derived2, Dynamic>
|
||||
{
|
||||
static void run(Derived1 &, const Derived2 &) {}
|
||||
static void run(Derived1 &, const Derived2 &) { ei_internal_assert(false && "ei_matrix_assignment_packet_unroller"); }
|
||||
};
|
||||
|
||||
template <typename Derived, typename OtherDerived,
|
||||
bool Vectorize = (Derived::Flags & OtherDerived::Flags & VectorizableBit)
|
||||
&& ((Derived::Flags&RowMajorBit)==(OtherDerived::Flags&RowMajorBit))>
|
||||
struct ei_operator_equals_impl;
|
||||
struct ei_assignment_impl;
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
Derived& MatrixBase<Derived>
|
||||
::lazyAssign(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
ei_operator_equals_impl<Derived,OtherDerived>::execute(derived(),other.derived());
|
||||
ei_assignment_impl<Derived,OtherDerived>::execute(derived(),other.derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
@@ -152,125 +116,27 @@ template<typename OtherDerived>
|
||||
Derived& MatrixBase<Derived>
|
||||
::operator=(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
const bool need_to_transpose = Derived::IsVectorAtCompileTime
|
||||
&& OtherDerived::IsVectorAtCompileTime
|
||||
&& (int)Derived::RowsAtCompileTime != (int)OtherDerived::RowsAtCompileTime;
|
||||
if(OtherDerived::Flags & EvalBeforeAssigningBit)
|
||||
{
|
||||
return lazyAssign(other.derived().eval());
|
||||
if(need_to_transpose)
|
||||
return lazyAssign(other.transpose().eval());
|
||||
else
|
||||
return lazyAssign(other.eval());
|
||||
}
|
||||
else
|
||||
return lazyAssign(other.derived());
|
||||
{
|
||||
if(need_to_transpose)
|
||||
return lazyAssign(other.transpose());
|
||||
else
|
||||
return lazyAssign(other.derived());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Derived, typename OtherDerived>
|
||||
struct ei_operator_equals_impl<Derived, OtherDerived, false>
|
||||
{
|
||||
static void execute(Derived & dst, const OtherDerived & src)
|
||||
{
|
||||
const bool unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
|
||||
if(Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime)
|
||||
// copying a vector expression into a vector
|
||||
{
|
||||
ei_assert(dst.size() == src.size());
|
||||
if(unroll)
|
||||
ei_vector_operator_equals_unroller
|
||||
<Derived, OtherDerived,
|
||||
unroll ? Derived::SizeAtCompileTime : Dynamic
|
||||
>::run(dst.derived(), src.derived());
|
||||
else
|
||||
{
|
||||
#ifdef EIGEN_USE_OPENMPf
|
||||
if(Derived::Flags & OtherDerived::Flags & LargeBit)
|
||||
{
|
||||
#ifdef __INTEL_COMPILER
|
||||
#pragma omp parallel default(none) shared(other)
|
||||
#else
|
||||
#pragma omp parallel default(none)
|
||||
#endif
|
||||
{
|
||||
#pragma omp for
|
||||
for(int i = 0; i < dst.size(); i++)
|
||||
dst.coeffRef(i) = src.coeff(i);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif // EIGEN_USE_OPENMP
|
||||
{
|
||||
for(int i = 0; i < dst.size(); i++)
|
||||
dst.coeffRef(i) = src.coeff(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
else // copying a matrix expression into a matrix
|
||||
{
|
||||
ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
if(unroll)
|
||||
{
|
||||
ei_matrix_operator_equals_unroller
|
||||
<Derived, OtherDerived,
|
||||
unroll ? Derived::SizeAtCompileTime : Dynamic
|
||||
>::run(dst.derived(), src.derived());
|
||||
}
|
||||
else
|
||||
{
|
||||
if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
|
||||
{
|
||||
#ifdef EIGEN_USE_OPENMP
|
||||
if(Derived::Flags & OtherDerived::Flags & LargeBit)
|
||||
{
|
||||
#ifdef __INTEL_COMPILER
|
||||
#pragma omp parallel default(none) shared(other)
|
||||
#else
|
||||
#pragma omp parallel default(none)
|
||||
#endif
|
||||
{
|
||||
#pragma omp for
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
for(int i = 0; i < dst.rows(); i++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif // EIGEN_USE_OPENMP
|
||||
{
|
||||
// traverse in column-major order
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
for(int i = 0; i < dst.rows(); i++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef EIGEN_USE_OPENMP
|
||||
if(Derived::Flags & OtherDerived::Flags & LargeBit)
|
||||
{
|
||||
#ifdef __INTEL_COMPILER
|
||||
#pragma omp parallel default(none) shared(other)
|
||||
#else
|
||||
#pragma omp parallel default(none)
|
||||
#endif
|
||||
{
|
||||
#pragma omp for
|
||||
for(int i = 0; i < dst.rows(); i++)
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif // EIGEN_USE_OPENMP
|
||||
{
|
||||
// traverse in row-major order
|
||||
// in order to allow the compiler to unroll the inner loop
|
||||
for(int i = 0; i < dst.rows(); i++)
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Derived, typename OtherDerived>
|
||||
struct ei_operator_equals_impl<Derived, OtherDerived, true>
|
||||
struct ei_assignment_impl<Derived, OtherDerived, false>
|
||||
{
|
||||
static void execute(Derived & dst, const OtherDerived & src)
|
||||
{
|
||||
@@ -278,7 +144,47 @@ struct ei_operator_equals_impl<Derived, OtherDerived, true>
|
||||
ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
if(unroll)
|
||||
{
|
||||
ei_matrix_operator_equals_packet_unroller
|
||||
ei_matrix_assignment_unroller
|
||||
<Derived, OtherDerived,
|
||||
unroll ? Derived::SizeAtCompileTime : Dynamic
|
||||
>::run(dst.derived(), src.derived());
|
||||
}
|
||||
else
|
||||
{
|
||||
if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
|
||||
{
|
||||
#define EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
for(int j = 0; j < dst.cols(); j++) \
|
||||
for(int i = 0; i < dst.rows(); i++) \
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
EIGEN_RUN_PARALLELIZABLE_LOOP(Derived::Flags & OtherDerived::Flags & LargeBit)
|
||||
#undef EIGEN_THE_PARALLELIZABLE_LOOP
|
||||
}
|
||||
else
|
||||
{
|
||||
// traverse in row-major order
|
||||
// in order to allow the compiler to unroll the inner loop
|
||||
#define EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
for(int i = 0; i < dst.rows(); i++) \
|
||||
for(int j = 0; j < dst.cols(); j++) \
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
EIGEN_RUN_PARALLELIZABLE_LOOP(Derived::Flags & OtherDerived::Flags & LargeBit)
|
||||
#undef EIGEN_THE_PARALLELIZABLE_LOOP
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Derived, typename OtherDerived>
|
||||
struct ei_assignment_impl<Derived, OtherDerived, true>
|
||||
{
|
||||
static void execute(Derived & dst, const OtherDerived & src)
|
||||
{
|
||||
const bool unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
|
||||
ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
if(unroll)
|
||||
{
|
||||
ei_matrix_assignment_packet_unroller
|
||||
<Derived, OtherDerived,
|
||||
unroll && int(Derived::SizeAtCompileTime)>=ei_packet_traits<typename Derived::Scalar>::size
|
||||
? Derived::SizeAtCompileTime-ei_packet_traits<typename Derived::Scalar>::size
|
||||
@@ -288,15 +194,21 @@ struct ei_operator_equals_impl<Derived, OtherDerived, true>
|
||||
{
|
||||
if(OtherDerived::Flags&RowMajorBit)
|
||||
{
|
||||
for(int i = 0; i < dst.rows(); i++)
|
||||
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
|
||||
#define EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
for(int i = 0; i < dst.rows(); i++) \
|
||||
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size) \
|
||||
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
|
||||
EIGEN_RUN_PARALLELIZABLE_LOOP(Derived::Flags & OtherDerived::Flags & LargeBit)
|
||||
#undef EIGEN_THE_PARALLELIZABLE_LOOP
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
|
||||
#define EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
for(int j = 0; j < dst.cols(); j++) \
|
||||
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size) \
|
||||
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
|
||||
EIGEN_RUN_PARALLELIZABLE_LOOP(Derived::Flags & OtherDerived::Flags & LargeBit)
|
||||
#undef EIGEN_THE_PARALLELIZABLE_LOOP
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -283,64 +283,70 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
if( (Flags & VectorizableBit) && (!(Lhs::Flags & RowMajorBit)) )
|
||||
{
|
||||
for(int k=0; k<m_rhs.cols(); k++)
|
||||
{
|
||||
int j=0;
|
||||
for(; j<cols4; j+=4)
|
||||
{
|
||||
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k));
|
||||
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k));
|
||||
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k));
|
||||
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k));
|
||||
for (int i=0; i<m_lhs.rows(); i+=ei_packet_traits<Scalar>::size)
|
||||
{
|
||||
res.writePacketCoeff(i,k,
|
||||
ei_padd(
|
||||
res.packetCoeff(i,k),
|
||||
ei_padd(
|
||||
ei_padd(
|
||||
ei_pmul(tmp0, m_lhs.packetCoeff(i,j)),
|
||||
ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))),
|
||||
ei_padd(
|
||||
ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)),
|
||||
ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3))
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
#define EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
for(int k=0; k<this->cols(); k++) \
|
||||
{ \
|
||||
int j=0; \
|
||||
for(; j<cols4; j+=4) \
|
||||
{ \
|
||||
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k)); \
|
||||
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k)); \
|
||||
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k)); \
|
||||
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k)); \
|
||||
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size) \
|
||||
{ \
|
||||
res.writePacketCoeff(i,k,\
|
||||
ei_padd( \
|
||||
res.packetCoeff(i,k), \
|
||||
ei_padd( \
|
||||
ei_padd( \
|
||||
ei_pmul(tmp0, m_lhs.packetCoeff(i,j)), \
|
||||
ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))), \
|
||||
ei_padd( \
|
||||
ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)), \
|
||||
ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3)) \
|
||||
) \
|
||||
) \
|
||||
) \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
for(; j<m_lhs.cols(); ++j) \
|
||||
{ \
|
||||
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k)); \
|
||||
for (int i=0; i<this->rows(); ++i) \
|
||||
res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j))); \
|
||||
} \
|
||||
}
|
||||
for(; j<m_lhs.cols(); ++j)
|
||||
{
|
||||
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k));
|
||||
for (int i=0; i<m_lhs.rows(); ++i)
|
||||
res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j)));
|
||||
}
|
||||
}
|
||||
EIGEN_RUN_PARALLELIZABLE_LOOP(Flags & DestDerived::Flags & LargeBit)
|
||||
#undef EIGEN_THE_PARALLELIZABLE_LOOP
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#endif // EIGEN_VECTORIZE
|
||||
{
|
||||
for(int k=0; k<m_rhs.cols(); ++k)
|
||||
{
|
||||
int j=0;
|
||||
for(; j<cols4; j+=4)
|
||||
{
|
||||
const Scalar tmp0 = m_rhs.coeff(j ,k);
|
||||
const Scalar tmp1 = m_rhs.coeff(j+1,k);
|
||||
const Scalar tmp2 = m_rhs.coeff(j+2,k);
|
||||
const Scalar tmp3 = m_rhs.coeff(j+3,k);
|
||||
for (int i=0; i<m_lhs.rows(); ++i)
|
||||
res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j) + tmp1 * m_lhs.coeff(i,j+1)
|
||||
+ tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3);
|
||||
#define EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
for(int k=0; k<this->cols(); ++k) \
|
||||
{ \
|
||||
int j=0; \
|
||||
for(; j<cols4; j+=4) \
|
||||
{ \
|
||||
const Scalar tmp0 = m_rhs.coeff(j ,k); \
|
||||
const Scalar tmp1 = m_rhs.coeff(j+1,k); \
|
||||
const Scalar tmp2 = m_rhs.coeff(j+2,k); \
|
||||
const Scalar tmp3 = m_rhs.coeff(j+3,k); \
|
||||
for (int i=0; i<this->rows(); ++i) \
|
||||
res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j) + tmp1 * m_lhs.coeff(i,j+1) \
|
||||
+ tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3); \
|
||||
} \
|
||||
for(; j<m_lhs.cols(); ++j) \
|
||||
{ \
|
||||
const Scalar tmp = m_rhs.coeff(j,k); \
|
||||
for (int i=0; i<this->rows(); ++i) \
|
||||
res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j); \
|
||||
} \
|
||||
}
|
||||
for(; j<m_lhs.cols(); ++j)
|
||||
{
|
||||
const Scalar tmp = m_rhs.coeff(j,k);
|
||||
for (int i=0; i<m_lhs.rows(); ++i)
|
||||
res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j);
|
||||
}
|
||||
}
|
||||
EIGEN_RUN_PARALLELIZABLE_LOOP(Flags & DestDerived::Flags & LargeBit)
|
||||
#undef EIGEN_THE_PARALLELIZABLE_LOOP
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -74,6 +74,30 @@ using Eigen::MatrixBase;
|
||||
#define EIGEN_ONLY_USED_FOR_DEBUG(x)
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_USE_OPENMP
|
||||
# ifdef __INTEL_COMPILER
|
||||
# define EIGEN_PRAGMA_OMP_PARALLEL _Pragma("omp parallel default(none) shared(other)")
|
||||
# else
|
||||
# define EIGEN_PRAGMA_OMP_PARALLEL _Pragma("omp parallel default(none)")
|
||||
# endif
|
||||
# define EIGEN_RUN_PARALLELIZABLE_LOOP(condition) \
|
||||
if(condition) \
|
||||
{ \
|
||||
EIGEN_PRAGMA_OMP_PARALLEL \
|
||||
{ \
|
||||
_Pragma("omp for") \
|
||||
EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
EIGEN_THE_PARALLELIZABLE_LOOP \
|
||||
}
|
||||
#else // EIGEN_USE_OPENMP
|
||||
# define EIGEN_RUN_PARALLELIZABLE_LOOP(condition) EIGEN_THE_PARALLELIZABLE_LOOP
|
||||
#endif
|
||||
|
||||
|
||||
// FIXME with the always_inline attribute,
|
||||
// gcc 3.4.x reports the following compilation error:
|
||||
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
|
||||
|
||||
Reference in New Issue
Block a user