diff --git a/Eigen/Core b/Eigen/Core index 6a315b09f..24dc37145 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -10,13 +10,6 @@ #endif #endif -#ifndef EIGEN_DONT_PARALLELIZE -#ifdef _OPENMP -#define EIGEN_USE_OPENMP -#include -#endif -#endif - #include #include #include diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 1b6e928d2..d0f126689 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -135,11 +135,6 @@ Derived& MatrixBase } } -template bool ei_should_parallelize_assignment(const T1& t, const T2&) -{ - return (T1::Flags & T2::Flags & LargeBit) && t.size() >= EIGEN_PARALLELIZATION_TRESHOLD; -} - template struct ei_assignment_impl { @@ -158,23 +153,17 @@ struct ei_assignment_impl { if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic) { - #define EIGEN_THE_PARALLELIZABLE_LOOP \ - for(int j = 0; j < dst.cols(); j++) \ - for(int i = 0; i < dst.rows(); i++) \ - dst.coeffRef(i, j) = src.coeff(i, j); - EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src)) - #undef EIGEN_THE_PARALLELIZABLE_LOOP + for(int j = 0; j < dst.cols(); j++) + for(int i = 0; i < dst.rows(); i++) + dst.coeffRef(i, j) = src.coeff(i, j); } else { // traverse in row-major order // in order to allow the compiler to unroll the inner loop - #define EIGEN_THE_PARALLELIZABLE_LOOP \ - for(int i = 0; i < dst.rows(); i++) \ - for(int j = 0; j < dst.cols(); j++) \ - dst.coeffRef(i, j) = src.coeff(i, j); - EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src)) - #undef EIGEN_THE_PARALLELIZABLE_LOOP + for(int i = 0; i < dst.rows(); i++) + for(int j = 0; j < dst.cols(); j++) + dst.coeffRef(i, j) = src.coeff(i, j); } } } @@ -199,21 +188,15 @@ struct ei_assignment_impl { if(OtherDerived::Flags&RowMajorBit) { - #define EIGEN_THE_PARALLELIZABLE_LOOP \ - for(int i = 0; i < dst.rows(); i++) \ - for(int j = 0; j < dst.cols(); j+=ei_packet_traits::size) \ + for(int i = 0; i < dst.rows(); i++) + for(int j = 0; j < dst.cols(); j+=ei_packet_traits::size) dst.writePacketCoeff(i, j, src.packetCoeff(i, j)); - EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src)) - #undef EIGEN_THE_PARALLELIZABLE_LOOP } else { - #define EIGEN_THE_PARALLELIZABLE_LOOP \ - for(int j = 0; j < dst.cols(); j++) \ - for(int i = 0; i < dst.rows(); i+=ei_packet_traits::size) \ + for(int j = 0; j < dst.cols(); j++) + for(int i = 0; i < dst.rows(); i+=ei_packet_traits::size) dst.writePacketCoeff(i, j, src.packetCoeff(i, j)); - EIGEN_RUN_PARALLELIZABLE_LOOP(ei_should_parallelize_assignment(dst, src)) - #undef EIGEN_THE_PARALLELIZABLE_LOOP } } } diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index b593825f8..a49609f5c 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -280,75 +280,67 @@ void Product::_cacheOptimalEval(DestDerived& res) const { res.setZero(); const int cols4 = m_lhs.cols() & 0xfffffffC; - const bool should_parallelize = (Flags & DestDerived::Flags & LargeBit) - && res.size() >= EIGEN_PARALLELIZATION_TRESHOLD; #ifdef EIGEN_VECTORIZE if( (Flags & VectorizableBit) && (!(Lhs::Flags & RowMajorBit)) ) - { - #define EIGEN_THE_PARALLELIZABLE_LOOP \ - for(int k=0; kcols(); k++) \ - { \ - int j=0; \ - for(; j::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k)); \ - const typename ei_packet_traits::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k)); \ - const typename ei_packet_traits::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k)); \ - const typename ei_packet_traits::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k)); \ - for (int i=0; irows(); i+=ei_packet_traits::size) \ - { \ - res.writePacketCoeff(i,k,\ - ei_padd( \ - res.packetCoeff(i,k), \ - ei_padd( \ - ei_padd( \ - ei_pmul(tmp0, m_lhs.packetCoeff(i,j)), \ - ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))), \ - ei_padd( \ - ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)), \ - ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3)) \ - ) \ - ) \ - ) \ - ); \ - } \ - } \ - for(; j::type tmp = ei_pset1(m_rhs.coeff(j,k)); \ - for (int i=0; irows(); ++i) \ - res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j))); \ - } \ + { + for(int k=0; kcols(); k++) + { + int j=0; + for(; j::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k)); + const typename ei_packet_traits::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k)); + const typename ei_packet_traits::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k)); + const typename ei_packet_traits::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k)); + for (int i=0; irows(); i+=ei_packet_traits::size) + { + res.writePacketCoeff(i,k,\ + ei_padd( + res.packetCoeff(i,k), + ei_padd( + ei_padd( + ei_pmul(tmp0, m_lhs.packetCoeff(i,j)), + ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))), + ei_padd( + ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)), + ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3)) + ) + ) + ) + ); + } } - EIGEN_RUN_PARALLELIZABLE_LOOP(should_parallelize) - #undef EIGEN_THE_PARALLELIZABLE_LOOP + for(; j::type tmp = ei_pset1(m_rhs.coeff(j,k)); + for (int i=0; irows(); ++i) + res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j))); + } + } } else #endif // EIGEN_VECTORIZE { - #define EIGEN_THE_PARALLELIZABLE_LOOP \ - for(int k=0; kcols(); ++k) \ - { \ - int j=0; \ - for(; jrows(); ++i) \ - res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j) + tmp1 * m_lhs.coeff(i,j+1) \ - + tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3); \ - } \ - for(; jrows(); ++i) \ - res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j); \ - } \ + for(int k=0; kcols(); ++k) + { + int j=0; + for(; jrows(); ++i) + res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j) + tmp1 * m_lhs.coeff(i,j+1) + + tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3); } - EIGEN_RUN_PARALLELIZABLE_LOOP(should_parallelize) - #undef EIGEN_THE_PARALLELIZABLE_LOOP + for(; jrows(); ++i) + res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j); + } + } } } diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index fad046766..be5e7bba5 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -37,10 +37,6 @@ #define EIGEN_UNROLLING_LIMIT 400 #endif -#ifndef EIGEN_PARALLELIZATION_TRESHOLD -#define EIGEN_PARALLELIZATION_TRESHOLD 2000 -#endif - #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR #define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER RowMajorBit #else @@ -78,30 +74,6 @@ using Eigen::MatrixBase; #define EIGEN_ONLY_USED_FOR_DEBUG(x) #endif -#ifdef EIGEN_USE_OPENMP -# ifdef __INTEL_COMPILER -# define EIGEN_PRAGMA_OMP_PARALLEL _Pragma("omp parallel default(none) shared(other)") -# else -# define EIGEN_PRAGMA_OMP_PARALLEL _Pragma("omp parallel default(none)") -# endif -# define EIGEN_RUN_PARALLELIZABLE_LOOP(condition) \ - if(condition) \ - { \ - EIGEN_PRAGMA_OMP_PARALLEL \ - { \ - _Pragma("omp for") \ - EIGEN_THE_PARALLELIZABLE_LOOP \ - } \ - } \ - else \ - { \ - EIGEN_THE_PARALLELIZABLE_LOOP \ - } -#else // EIGEN_USE_OPENMP -# define EIGEN_RUN_PARALLELIZABLE_LOOP(condition) EIGEN_THE_PARALLELIZABLE_LOOP -#endif - - // FIXME with the always_inline attribute, // gcc 3.4.x reports the following compilation error: // Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval Eigen::MatrixBase::eval() const' diff --git a/Eigen/src/LU/Inverse.h b/Eigen/src/LU/Inverse.h index 1d4bd9bf0..eda20e1f3 100644 --- a/Eigen/src/LU/Inverse.h +++ b/Eigen/src/LU/Inverse.h @@ -92,7 +92,6 @@ template class Inverse : ei_no_assignm enum { _Size = MatrixType::RowsAtCompileTime }; void _compute(const MatrixType& matrix); void _compute_in_general_case(const MatrixType& matrix); - void _compute_in_size1_case(const MatrixType& matrix); void _compute_in_size2_case(const MatrixType& matrix); void _compute_in_size3_case(const MatrixType& matrix); void _compute_in_size4_case(const MatrixType& matrix); diff --git a/bench/benchmarkXcwise.cpp b/bench/benchmarkXcwise.cpp index b2a7fc24c..9b394ff35 100644 --- a/bench/benchmarkXcwise.cpp +++ b/bench/benchmarkXcwise.cpp @@ -5,12 +5,12 @@ using namespace std; USING_PART_OF_NAMESPACE_EIGEN -#ifndef MATTYPE -#define MATTYPE MatrixXLd +#ifndef VECTYPE +#define VECTYPE VectorXLd #endif -#ifndef MATSIZE -#define MATSIZE 1000000 +#ifndef VECSIZE +#define VECSIZE 1000000 #endif #ifndef REPEAT @@ -19,16 +19,16 @@ USING_PART_OF_NAMESPACE_EIGEN int main(int argc, char *argv[]) { - MATTYPE I = MATTYPE::ones(MATSIZE,1); - MATTYPE m(MATSIZE,1); - for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < 1; j++) + VECTYPE I = VECTYPE::ones(VECSIZE); + VECTYPE m(VECSIZE,1); + for(int i = 0; i < VECSIZE; i++) { - m(i,j) = 0.1 * (i+j+1)/MATSIZE/MATSIZE; + m[i] = 0.1 * i/VECSIZE; } for(int a = 0; a < REPEAT; a++) { - m = MATTYPE::ones(MATSIZE,1) + 0.00005 * (m.cwiseProduct(m) + m/4); + m = VECTYPE::ones(VECSIZE) + 0.00005 * (m.cwiseProduct(m) + m/4); } - cout << m(0,0) << endl; + cout << m[0] << endl; return 0; }