bump to 3.1.0-rc1

bug #466 : better fix for the race condition: this new patch add an initParallel()
function which must be called at the initialization time of any multi-threaded application calling Eigen from multiple threads.
2026-04-10 11:34:33 +08:00 · 2012-06-14 21:25:50 +02:00 · 2012-06-14 14:24:15 +02:00 · 2012-06-14 10:38:39 +02:00 · 2012-06-14 09:49:57 +02:00 · 2012-06-12 16:47:14 +02:00
25 changed files with 406 additions and 316 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -338,12 +338,9 @@ if(EIGEN_BUILD_BTL)
  add_subdirectory(bench/btl EXCLUDE_FROM_ALL)
 endif(EIGEN_BUILD_BTL)

-if(TEST_REAL_CASES)
-  if(NOT WIN32)
-    add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
-    set(ENV(EIGEN_MATRIX_DIR) ${TEST_REAL_CASES})
-  endif(NOT WIN32)
-endif(TEST_REAL_CASES)
+if(NOT WIN32)
+  add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
+endif(NOT WIN32)

 ei_testing_print_summary()

--- a/Eigen/Core
+++ b/Eigen/Core
@@ -329,12 +329,12 @@ using std::ptrdiff_t;
 #include "src/Core/GeneralProduct.h"
 #include "src/Core/TriangularMatrix.h"
 #include "src/Core/SelfAdjointView.h"
-#include "src/Core/SolveTriangular.h"
+#include "src/Core/products/GeneralBlockPanelKernel.h"
 #include "src/Core/products/Parallelizer.h"
 #include "src/Core/products/CoeffBasedProduct.h"
-#include "src/Core/products/GeneralBlockPanelKernel.h"
 #include "src/Core/products/GeneralMatrixVector.h"
 #include "src/Core/products/GeneralMatrixMatrix.h"
+#include "src/Core/SolveTriangular.h"
 #include "src/Core/products/GeneralMatrixMatrixTriangular.h"
 #include "src/Core/products/SelfadjointMatrixVector.h"
 #include "src/Core/products/SelfadjointMatrixMatrix.h"
--- a/Eigen/src/Core/Assign_MKL.h
+++ b/Eigen/src/Core/Assign_MKL.h
@@ -209,10 +209,13 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)

 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)

+// The vm*powx functions are not avaibale in the windows version of MKL.
+#ifdef _WIN32
 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
+#endif

 } // end namespace internal

--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -295,7 +295,7 @@ struct functor_traits<scalar_opposite_op<Scalar> >
 template<typename Scalar> struct scalar_abs_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
  typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return abs(a); }
+  EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return internal::abs(a); }
  template<typename Packet>
  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
  { return internal::pabs(a); }
@@ -317,7 +317,7 @@ struct functor_traits<scalar_abs_op<Scalar> >
 template<typename Scalar> struct scalar_abs2_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
  typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return abs2(a); }
+  EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return internal::abs2(a); }
  template<typename Packet>
  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
  { return internal::pmul(a,a); }
@@ -333,7 +333,7 @@ struct functor_traits<scalar_abs2_op<Scalar> >
  */
 template<typename Scalar> struct scalar_conjugate_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return conj(a); }
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return internal::conj(a); }
  template<typename Packet>
  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
 };
@@ -370,7 +370,7 @@ template<typename Scalar>
 struct scalar_real_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
  typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return real(a); }
+  EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return internal::real(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_real_op<Scalar> >
@@ -385,7 +385,7 @@ template<typename Scalar>
 struct scalar_imag_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
  typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return imag(a); }
+  EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return internal::imag(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_imag_op<Scalar> >
@@ -400,7 +400,7 @@ template<typename Scalar>
 struct scalar_real_ref_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
  typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return real_ref(*const_cast<Scalar*>(&a)); }
+  EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return internal::real_ref(*const_cast<Scalar*>(&a)); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_real_ref_op<Scalar> >
@@ -415,7 +415,7 @@ template<typename Scalar>
 struct scalar_imag_ref_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
  typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return imag_ref(*const_cast<Scalar*>(&a)); }
+  EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return internal::imag_ref(*const_cast<Scalar*>(&a)); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_imag_ref_op<Scalar> >
@@ -429,7 +429,7 @@ struct functor_traits<scalar_imag_ref_op<Scalar> >
  */
 template<typename Scalar> struct scalar_exp_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
-  inline const Scalar operator() (const Scalar& a) const { return exp(a); }
+  inline const Scalar operator() (const Scalar& a) const { return internal::exp(a); }
  typedef typename packet_traits<Scalar>::type Packet;
  inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
 };
@@ -445,7 +445,7 @@ struct functor_traits<scalar_exp_op<Scalar> >
  */
 template<typename Scalar> struct scalar_log_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
-  inline const Scalar operator() (const Scalar& a) const { return log(a); }
+  inline const Scalar operator() (const Scalar& a) const { return internal::log(a); }
  typedef typename packet_traits<Scalar>::type Packet;
  inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
 };
@@ -703,7 +703,7 @@ struct functor_traits<scalar_add_op<Scalar> >
  */
 template<typename Scalar> struct scalar_sqrt_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
-  inline const Scalar operator() (const Scalar& a) const { return sqrt(a); }
+  inline const Scalar operator() (const Scalar& a) const { return internal::sqrt(a); }
  typedef typename packet_traits<Scalar>::type Packet;
  inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
 };
@@ -721,7 +721,7 @@ struct functor_traits<scalar_sqrt_op<Scalar> >
  */
 template<typename Scalar> struct scalar_cos_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
-  inline Scalar operator() (const Scalar& a) const { return cos(a); }
+  inline Scalar operator() (const Scalar& a) const { return internal::cos(a); }
  typedef typename packet_traits<Scalar>::type Packet;
  inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
 };
@@ -740,7 +740,7 @@ struct functor_traits<scalar_cos_op<Scalar> >
  */
 template<typename Scalar> struct scalar_sin_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
-  inline const Scalar operator() (const Scalar& a) const { return sin(a); }
+  inline const Scalar operator() (const Scalar& a) const { return internal::sin(a); }
  typedef typename packet_traits<Scalar>::type Packet;
  inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
 };
@@ -760,7 +760,7 @@ struct functor_traits<scalar_sin_op<Scalar> >
  */
 template<typename Scalar> struct scalar_tan_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
-  inline const Scalar operator() (const Scalar& a) const { return tan(a); }
+  inline const Scalar operator() (const Scalar& a) const { return internal::tan(a); }
  typedef typename packet_traits<Scalar>::type Packet;
  inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
 };
@@ -779,7 +779,7 @@ struct functor_traits<scalar_tan_op<Scalar> >
  */
 template<typename Scalar> struct scalar_acos_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
-  inline const Scalar operator() (const Scalar& a) const { return acos(a); }
+  inline const Scalar operator() (const Scalar& a) const { return internal::acos(a); }
  typedef typename packet_traits<Scalar>::type Packet;
  inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
 };
@@ -798,7 +798,7 @@ struct functor_traits<scalar_acos_op<Scalar> >
  */
 template<typename Scalar> struct scalar_asin_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
-  inline const Scalar operator() (const Scalar& a) const { return asin(a); }
+  inline const Scalar operator() (const Scalar& a) const { return internal::asin(a); }
  typedef typename packet_traits<Scalar>::type Packet;
  inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
 };
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -100,12 +100,22 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
  typedef typename Rhs::Index Index;
  typedef blas_traits<Lhs> LhsProductTraits;
  typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
+
  static void run(const Lhs& lhs, Rhs& rhs)
  {
    typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
+
+    const Index size = lhs.rows();
+    const Index othersize = Side==OnTheLeft? rhs.cols() : rhs.rows();
+
+    typedef internal::gemm_blocking_space<(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
+              Rhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxRowsAtCompileTime,4> BlockingType;
+
+    BlockingType blocking(rhs.rows(), rhs.cols(), size);
+
    triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
                               (Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
-      ::run(lhs.rows(), Side==OnTheLeft? rhs.cols() : rhs.rows(), &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride());
+      ::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride(), blocking);
  }
 };

--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -123,7 +123,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
  _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);


-  _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647949f);
+  _EIGEN_DECLARE_CONST_Packet4f(exp_hi,  88.3762626647950f);
  _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);

  _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
@@ -170,7 +170,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
  y = pmadd(y, z, x);
  y = padd(y, p4f_1);

-  /* build 2^n */
+  // build 2^n
  emm0 = _mm_cvttps_epi32(fx);
  emm0 = _mm_add_epi32(emm0, p4i_0x7f);
  emm0 = _mm_slli_epi32(emm0, 23);
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -26,7 +26,7 @@
 #define EIGEN_GENERAL_BLOCK_PANEL_H

 namespace Eigen { 
-
+  
 namespace internal {

 template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
@@ -42,9 +42,14 @@ inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff
 /** \internal */
 inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
 {
-  static std::ptrdiff_t m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
-  static std::ptrdiff_t m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
-
+  static std::ptrdiff_t m_l1CacheSize = 0;
+  static std::ptrdiff_t m_l2CacheSize = 0;
+  if(m_l2CacheSize==0)
+  {
+    m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
+    m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
+  }
+  
  if(action==SetAction)
  {
    // set the cpu cache size and cache all block sizes from a global cache size in byte
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -79,7 +79,7 @@ static void run(Index rows, Index cols, Index depth,

  typedef gebp_traits<LhsScalar,RhsScalar> Traits;

-  Index kc = blocking.kc();                 // cache block size along the K direction
+  Index kc = blocking.kc();                   // cache block size along the K direction
  Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
  //Index nc = blocking.nc(); // cache block size along the N direction

@@ -249,7 +249,7 @@ struct gemm_functor
    BlockingType& m_blocking;
 };

-template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth,
+template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1,
 bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;

 template<typename _LhsScalar, typename _RhsScalar>
@@ -282,8 +282,8 @@ class level3_blocking
    inline RhsScalar* blockW() { return m_blockW; }
 };

-template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth>
-class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, true>
+template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true>
  : public level3_blocking<
      typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
      typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
@@ -324,8 +324,8 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
    inline void allocateAll() {}
 };

-template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth>
-class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, false>
+template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false>
  : public level3_blocking<
      typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
      typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
@@ -349,7 +349,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
      this->m_nc = Transpose ? rows : cols;
      this->m_kc = depth;

-      computeProductBlockingSizes<LhsScalar,RhsScalar>(this->m_kc, this->m_mc, this->m_nc);
+      computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc);
      m_sizeA = this->m_mc * this->m_kc;
      m_sizeB = this->m_kc * this->m_nc;
      m_sizeW = this->m_kc*Traits::WorkSpaceFactor;
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -57,12 +57,23 @@ inline void manage_multi_threading(Action action, int* v)
  }
 }

+}
+
+/** Must be call first when calling Eigen from multiple threads */
+inline void initParallel()
+{
+  int nbt;
+  internal::manage_multi_threading(GetAction, &nbt);
+  std::ptrdiff_t l1, l2;
+  internal::manage_caching_sizes(GetAction, &l1, &l2);
+}
+
 /** \returns the max number of threads reserved for Eigen
  * \sa setNbThreads */
 inline int nbThreads()
 {
  int ret;
-  manage_multi_threading(GetAction, &ret);
+  internal::manage_multi_threading(GetAction, &ret);
  return ret;
 }

@@ -70,9 +81,11 @@ inline int nbThreads()
  * \sa nbThreads */
 inline void setNbThreads(int v)
 {
-  manage_multi_threading(SetAction, &v);
+  internal::manage_multi_threading(SetAction, &v);
 }

+namespace internal {
+
 template<typename Index> struct GemmParallelInfo
 {
  GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
@@ -121,6 +134,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
  if(threads==1)
    return func(0,rows, 0,cols);

+  Eigen::initParallel();
  func.initParallelSession();

  if(transpose)
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -36,14 +36,15 @@ struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,
  static EIGEN_DONT_INLINE void run(
    Index size, Index cols,
    const Scalar*  tri, Index triStride,
-    Scalar* _other, Index otherStride)
+    Scalar* _other, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking)
  {
    triangular_solve_matrix<
      Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,
      (Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
      NumTraits<Scalar>::IsComplex && Conjugate,
      TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor>
-      ::run(size, cols, tri, triStride, _other, otherStride);
+      ::run(size, cols, tri, triStride, _other, otherStride, blocking);
  }
 };

@@ -55,7 +56,8 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
  static EIGEN_DONT_INLINE void run(
    Index size, Index otherSize,
    const Scalar* _tri, Index triStride,
-    Scalar* _other, Index otherStride)
+    Scalar* _other, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking)
  {
    Index cols = otherSize;
    const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride);
@@ -67,17 +69,16 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
      IsLower = (Mode&Lower) == Lower
    };

-    Index kc = size; // cache block size along the K direction
-    Index mc = size;  // cache block size along the M direction
-    Index nc = cols;  // cache block size along the N direction
-    computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
+    Index kc = blocking.kc();                   // cache block size along the K direction
+    Index mc = (std::min)(size,blocking.mc());  // cache block size along the M direction

+    std::size_t sizeA = kc*mc;
+    std::size_t sizeB = kc*cols;
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
-    std::size_t sizeB = sizeW + kc*cols;
-    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
-    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
-    Scalar* blockB = allocatedBlockB + sizeW;
-    Scalar* blockW = allocatedBlockB;
+
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());

    conj_if<Conjugate> conj;
    gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
@@ -181,7 +182,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
          {
            pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc);

-            gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1));
+            gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0, blockW);
          }
        }
      }
@@ -197,7 +198,8 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
  static EIGEN_DONT_INLINE void run(
    Index size, Index otherSize,
    const Scalar* _tri, Index triStride,
-    Scalar* _other, Index otherStride)
+    Scalar* _other, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking)
  {
    Index rows = otherSize;
    const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride);
@@ -210,19 +212,16 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
      IsLower = (Mode&Lower) == Lower
    };

-//     Index kc = std::min<Index>(Traits::Max_kc/4,size); // cache block size along the K direction
-//     Index mc = std::min<Index>(Traits::Max_mc,size);   // cache block size along the M direction
-    // check that !!!!
-    Index kc = size; // cache block size along the K direction
-    Index mc = size;  // cache block size along the M direction
-    Index nc = rows;  // cache block size along the N direction
-    computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
+    Index kc = blocking.kc();                   // cache block size along the K direction
+    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction

+    std::size_t sizeA = kc*mc;
+    std::size_t sizeB = kc*size;
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
-    std::size_t sizeB = sizeW + kc*size;
-    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
-    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
-    Scalar* blockB = allocatedBlockB + sizeW;
+
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());

    conj_if<Conjugate> conj;
    gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
@@ -289,7 +288,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
                          Scalar(-1),
                          actual_kc, actual_kc, // strides
                          panelOffset, panelOffset, // offsets
-                          allocatedBlockB);  // workspace
+                          blockW);  // workspace
            }

            // unblocked triangular solve
@@ -320,7 +319,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
        if (rs>0)
          gebp_kernel(_other+i2+startPanel*otherStride, otherStride, blockA, geb,
                      actual_mc, actual_kc, rs, Scalar(-1),
-                      -1, -1, 0, 0, allocatedBlockB);
+                      -1, -1, 0, 0, blockW);
      }
    }
  }
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -28,7 +28,7 @@

 #define EIGEN_WORLD_VERSION 3
 #define EIGEN_MAJOR_VERSION 0
-#define EIGEN_MINOR_VERSION 93
+#define EIGEN_MINOR_VERSION 94

 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                      (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -468,15 +468,40 @@ public:
  {
    return internal::transform_transform_product_impl<Transform,Transform>::run(*this,other);
  }
-
+  
+  #ifdef __INTEL_COMPILER
+private:
+  // this intermediate structure permits to workaround a bug in ICC 11:
+  //   error: template instantiation resulted in unexpected function type of "Eigen::Transform<double, 3, 32, 0>
+  //             (const Eigen::Transform<double, 3, 2, 0> &) const"
+  //  (the meaning of a name may have changed since the template declaration -- the type of the template is:
+  // "Eigen::internal::transform_transform_product_impl<Eigen::Transform<double, 3, 32, 0>,
+  //     Eigen::Transform<double, 3, Mode, Options>, <expression>>::ResultType (const Eigen::Transform<double, 3, Mode, Options> &) const")
+  // 
+  template<int OtherMode,int OtherOptions> struct icc_11_workaround
+  {
+    typedef internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> > ProductType;
+    typedef typename ProductType::ResultType ResultType;
+  };
+  
+public:
  /** Concatenates two different transformations */
  template<int OtherMode,int OtherOptions>
-  inline const typename internal::transform_transform_product_impl<
-    Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
+  inline typename icc_11_workaround<OtherMode,OtherOptions>::ResultType
+    operator * (const Transform<Scalar,Dim,OtherMode,OtherOptions>& other) const
+  {
+    typedef typename icc_11_workaround<OtherMode,OtherOptions>::ProductType ProductType;
+    return ProductType::run(*this,other);
+  }
+  #else
+  /** Concatenates two different transformations */
+  template<int OtherMode,int OtherOptions>
+  inline typename internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
    operator * (const Transform<Scalar,Dim,OtherMode,OtherOptions>& other) const
  {
    return internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::run(*this,other);
  }
+  #endif

  /** \sa MatrixBase::setIdentity() */
  void setIdentity() { m_matrix.setIdentity(); }
--- a/Eigen/src/PaStiXSupport/PaStiXSupport.h
+++ b/Eigen/src/PaStiXSupport/PaStiXSupport.h
@@ -35,7 +35,6 @@ namespace Eigen {
  *
  * \sa TutorialSparseDirectSolvers
  */
-
 template<typename _MatrixType, bool IsStrSym = false> class PastixLU;
 template<typename _MatrixType, int Options> class PastixLLT;
 template<typename _MatrixType, int Options> class PastixLDLT;
@@ -75,32 +74,34 @@ namespace internal
  void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)
  {
    if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
-    if (nbrhs == 0) x = NULL;
+    if (nbrhs == 0) {x = NULL; nbrhs=1;}
    s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); 
  }
  
  void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm)
  {
    if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
-    if (nbrhs == 0) x = NULL;
+    if (nbrhs == 0) {x = NULL; nbrhs=1;}
    d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); 
  }
  
  void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm)
  {
+    if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
+    if (nbrhs == 0) {x = NULL; nbrhs=1;}
    c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<COMPLEX*>(vals), perm, invp, reinterpret_cast<COMPLEX*>(x), nbrhs, iparm, dparm); 
  }
  
  void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)
  {
    if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
-    if (nbrhs == 0) x = NULL;
+    if (nbrhs == 0) {x = NULL; nbrhs=1;}
    z_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<DCOMPLEX*>(vals), perm, invp, reinterpret_cast<DCOMPLEX*>(x), nbrhs, iparm, dparm); 
  }

  // Convert the matrix  to Fortran-style Numbering
  template <typename MatrixType>
-  void EigenToFortranNumbering (MatrixType& mat)
+  void c_to_fortran_numbering (MatrixType& mat)
  {
    if ( !(mat.outerIndexPtr()[0]) ) 
    { 
@@ -114,7 +115,7 @@ namespace internal
  
  // Convert to C-style Numbering
  template <typename MatrixType>
-  void EigenToCNumbering (MatrixType& mat)
+  void fortran_to_c_numbering (MatrixType& mat)
  {
    // Check the Numbering
    if ( mat.outerIndexPtr()[0] == 1 ) 
@@ -126,38 +127,12 @@ namespace internal
        --mat.innerIndexPtr()[i];
    }
  }
-  
-  // Symmetrize the graph of the input matrix 
-  // In : The Input matrix to symmetrize the pattern
-  // Out : The output matrix
-  // StrMatTrans : The structural pattern of the transpose of In; It is 
-  // used to optimize the future symmetrization with the same matrix pattern
-  // WARNING It is assumed here that successive calls to this routine are done 
-  // with matrices having the same pattern.
-  template <typename MatrixType>
-  void EigenSymmetrizeMatrixGraph (const MatrixType& In, MatrixType& Out, MatrixType& StrMatTrans, bool& hasTranspose)
-  {
-    eigen_assert(In.cols()==In.rows() && " Can only symmetrize the graph of a square matrix");
-    if (!hasTranspose)
-    { //First call to this routine, need to compute the structural pattern of In^T
-      StrMatTrans = In.transpose();
-      // Set the elements of the matrix to zero 
-      for (int i = 0; i < StrMatTrans.rows(); i++) 
-      {
-        for (typename MatrixType::InnerIterator it(StrMatTrans, i); it; ++it)
-          it.valueRef() = 0.0;
-      }
-      hasTranspose = true;
-    }
-    Out = (StrMatTrans + In).eval(); 
-  }
-
 }

 // This is the base class to interface with PaStiX functions. 
 // Users should not used this class directly. 
 template <class Derived>
-class PastixBase
+class PastixBase : internal::noncopyable
 {
  public:
    typedef typename internal::pastix_traits<Derived>::MatrixType _MatrixType;
@@ -166,29 +141,19 @@ class PastixBase
    typedef typename MatrixType::RealScalar RealScalar;
    typedef typename MatrixType::Index Index;
    typedef Matrix<Scalar,Dynamic,1> Vector;
+    typedef SparseMatrix<Scalar, ColMajor> ColSpMatrix;
    
  public:
    
-    PastixBase():m_initisOk(false),m_analysisIsOk(false),m_factorizationIsOk(false),m_isInitialized(false) 
+    PastixBase() : m_initisOk(false), m_analysisIsOk(false), m_factorizationIsOk(false), m_isInitialized(false), m_pastixdata(0), m_size(0)
    {
-      m_pastixdata = 0;
-      m_hasTranspose = false;
-      PastixInit();
+      init();
    }
    
    ~PastixBase() 
    {
-       PastixDestroy();
+      clean();
    }
-    
-    // Initialize the Pastix data structure, check the matrix
-    void PastixInit(); 
-    
-    // Compute the ordering and the symbolic factorization
-    Derived& analyzePattern (MatrixType& mat);
-    
-    // Compute the numerical factorization
-    Derived& factorize (MatrixType& mat);

    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
      *
@@ -269,7 +234,6 @@ class PastixBase
    /** Return a reference to a particular index parameter of the DPARM vector 
     * \sa dparm()
     */
-    
    double& dparm(int idxparam)
    {
      return m_dparm(idxparam);
@@ -307,17 +271,27 @@ class PastixBase
    }
    
  protected:
+
+    // Initialize the Pastix data structure, check the matrix
+    void init(); 
+    
+    // Compute the ordering and the symbolic factorization
+    void analyzePattern(ColSpMatrix& mat);
+    
+    // Compute the numerical factorization
+    void factorize(ColSpMatrix& mat);
+    
    // Free all the data allocated by Pastix
-    void PastixDestroy()
+    void clean()
    {
      eigen_assert(m_initisOk && "The Pastix structure should be allocated first"); 
      m_iparm(IPARM_START_TASK) = API_TASK_CLEAN;
      m_iparm(IPARM_END_TASK) = API_TASK_CLEAN;
-      internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, m_mat_null.outerIndexPtr(), m_mat_null.innerIndexPtr(),
-               m_mat_null.valuePtr(), m_perm.data(), m_invp.data(), m_vec_null.data(), 1, m_iparm.data(), m_dparm.data());
+      internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, 0, 0, (Scalar*)0,
+                             m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data());
    }
    
-    Derived& compute (MatrixType& mat);
+    void compute(ColSpMatrix& mat);
    
    int m_initisOk; 
    int m_analysisIsOk;
@@ -325,22 +299,12 @@ class PastixBase
    bool m_isInitialized;
    mutable ComputationInfo m_info; 
    mutable pastix_data_t *m_pastixdata; // Data structure for pastix
-    mutable SparseMatrix<Scalar, ColMajor> m_mat_null; // An input  null matrix
-    mutable Matrix<Scalar, Dynamic,1> m_vec_null; // An input null vector
-    mutable SparseMatrix<Scalar, ColMajor> m_StrMatTrans; // The transpose pattern of the input matrix
-    mutable bool m_hasTranspose; // The transpose of the current matrix has already been computed
    mutable int m_comm; // The MPI communicator identifier
-    mutable Matrix<Index,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters
+    mutable Matrix<int,IPARM_SIZE,1> m_iparm; // integer vector for the input parameters
    mutable Matrix<double,DPARM_SIZE,1> m_dparm; // Scalar vector for the input parameters
    mutable Matrix<Index,Dynamic,1> m_perm;  // Permutation vector
    mutable Matrix<Index,Dynamic,1> m_invp;  // Inverse permutation vector
-    mutable int m_ordering; // ordering method to use
-    mutable int m_amalgamation; // level of amalgamation
    mutable int m_size; // Size of the matrix 
-    
-  private:
-    PastixBase(PastixBase& ) {}
-    
 }; 

 /** Initialize the PaStiX data structure. 
@@ -348,29 +312,29 @@ class PastixBase
   * \sa iparm() dparm()
   */
 template <class Derived>
-void PastixBase<Derived>::PastixInit()
+void PastixBase<Derived>::init()
 {
  m_size = 0; 
-  m_iparm.resize(IPARM_SIZE);
-  m_dparm.resize(DPARM_SIZE);
+  m_iparm.setZero(IPARM_SIZE);
+  m_dparm.setZero(DPARM_SIZE);
  
  m_iparm(IPARM_MODIFY_PARAMETER) = API_NO;
-  if(m_pastixdata)
-  { // This trick is used to reset the Pastix internal data between successive
-    // calls with (structural) different matrices
-    PastixDestroy();
-    m_pastixdata = 0;
-    m_iparm(IPARM_MODIFY_PARAMETER) = API_YES;
-    m_hasTranspose = false; 
-  }
+  pastix(&m_pastixdata, MPI_COMM_WORLD,
+         0, 0, 0, 0,
+         0, 0, 0, 1, m_iparm.data(), m_dparm.data());
+  
+  m_iparm[IPARM_MATRIX_VERIFICATION] = API_NO;
+  m_iparm[IPARM_VERBOSE]             = 2;
+  m_iparm[IPARM_ORDERING]            = API_ORDER_SCOTCH;
+  m_iparm[IPARM_INCOMPLETE]          = API_NO;
+  m_iparm[IPARM_OOC_LIMIT]           = 2000;
+  m_iparm[IPARM_RHS_MAKING]          = API_RHS_B;
+  m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;
  
  m_iparm(IPARM_START_TASK) = API_TASK_INIT;
  m_iparm(IPARM_END_TASK) = API_TASK_INIT;
-  m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;
-  internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, m_mat_null.outerIndexPtr(), m_mat_null.innerIndexPtr(),
-               m_mat_null.valuePtr(), m_perm.data(), m_invp.data(), m_vec_null.data(), 1, m_iparm.data(), m_dparm.data());
-  
-  m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;
+  internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, 0, 0, (Scalar*)0,
+                         0, 0, 0, 0, m_iparm.data(), m_dparm.data());
  
  // Check the returned error
  if(m_iparm(IPARM_ERROR_NUMBER)) {
@@ -384,82 +348,74 @@ void PastixBase<Derived>::PastixInit()
 }

 template <class Derived>
-Derived& PastixBase<Derived>::compute(MatrixType& mat)
+void PastixBase<Derived>::compute(ColSpMatrix& mat)
 {
  eigen_assert(mat.rows() == mat.cols() && "The input matrix should be squared");
-  typedef typename MatrixType::Scalar Scalar;
  
-  // Save the size of the current matrix 
-  m_size = mat.rows();
-  // Convert the matrix in fortran-style numbering 
-  internal::EigenToFortranNumbering(mat);
-  analyzePattern(mat);
+  analyzePattern(mat);  
  factorize(mat);
+  
  m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;
-  if (m_factorizationIsOk) m_isInitialized = true;
-  
-  //Convert back the matrix -- Is it really necessary here 
-  internal::EigenToCNumbering(mat);
-  
-  return derived();
+  m_isInitialized = m_factorizationIsOk;
 }


 template <class Derived>
-Derived& PastixBase<Derived>::analyzePattern(MatrixType& mat)
-{
-  eigen_assert(m_initisOk && "PastixInit should be called first to set the default  parameters");
+void PastixBase<Derived>::analyzePattern(ColSpMatrix& mat)
+{                         
+  eigen_assert(m_initisOk && "The initialization of PaSTiX failed");
+  
+  // clean previous calls
+  if(m_size>0)
+    clean();
+  
  m_size = mat.rows();
  m_perm.resize(m_size);
  m_invp.resize(m_size);
  
-  // Convert the matrix in fortran-style numbering 
-  internal::EigenToFortranNumbering(mat);
-  
  m_iparm(IPARM_START_TASK) = API_TASK_ORDERING;
  m_iparm(IPARM_END_TASK) = API_TASK_ANALYSE;
-  
  internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, m_size, mat.outerIndexPtr(), mat.innerIndexPtr(),
-               mat.valuePtr(), m_perm.data(), m_invp.data(), m_vec_null.data(), 0, m_iparm.data(), m_dparm.data());
+               mat.valuePtr(), m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data());
  
  // Check the returned error
-  if(m_iparm(IPARM_ERROR_NUMBER)) {
+  if(m_iparm(IPARM_ERROR_NUMBER))
+  {
    m_info = NumericalIssue;
    m_analysisIsOk = false;
  }
-  else { 
+  else
+  { 
    m_info = Success;
    m_analysisIsOk = true;
  }
-  return derived();
 }

 template <class Derived>
-Derived& PastixBase<Derived>::factorize(MatrixType& mat)
+void PastixBase<Derived>::factorize(ColSpMatrix& mat)
 {
+//   if(&m_cpyMat != &mat) m_cpyMat = mat;
  eigen_assert(m_analysisIsOk && "The analysis phase should be called before the factorization phase");
  m_iparm(IPARM_START_TASK) = API_TASK_NUMFACT;
  m_iparm(IPARM_END_TASK) = API_TASK_NUMFACT;
  m_size = mat.rows();
  
-  // Convert the matrix in fortran-style numbering 
-  internal::EigenToFortranNumbering(mat);
-  
  internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, m_size, mat.outerIndexPtr(), mat.innerIndexPtr(),
-               mat.valuePtr(), m_perm.data(), m_invp.data(), m_vec_null.data(), 0, m_iparm.data(), m_dparm.data());
+               mat.valuePtr(), m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data());
  
  // Check the returned error
-  if(m_iparm(IPARM_ERROR_NUMBER)) {
+  if(m_iparm(IPARM_ERROR_NUMBER))
+  {
    m_info = NumericalIssue;
    m_factorizationIsOk = false;
    m_isInitialized = false;
  }
-  else { 
+  else
+  {
    m_info = Success;
    m_factorizationIsOk = true;
    m_isInitialized = true;
  }
-  return derived();
 }

 /* Solve the system */
@@ -475,20 +431,17 @@ bool PastixBase<Base>::_solve (const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) co
  x = b; /* on return, x is overwritten by the computed solution */
  
  for (int i = 0; i < b.cols(); i++){
-    m_iparm(IPARM_START_TASK) = API_TASK_SOLVE;
-    m_iparm(IPARM_END_TASK) = API_TASK_REFINE;
-    m_iparm(IPARM_RHS_MAKING) = API_RHS_B;
-    internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, x.rows(), m_mat_null.outerIndexPtr(), m_mat_null.innerIndexPtr(),
-               m_mat_null.valuePtr(), m_perm.data(), m_invp.data(), &x(0, i), rhs, m_iparm.data(), m_dparm.data());
+    m_iparm[IPARM_START_TASK]          = API_TASK_SOLVE;
+    m_iparm[IPARM_END_TASK]            = API_TASK_REFINE;
+  
+    internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, x.rows(), 0, 0, 0,
+                           m_perm.data(), m_invp.data(), &x(0, i), rhs, m_iparm.data(), m_dparm.data());
  }
+  
  // Check the returned error
-  if(m_iparm(IPARM_ERROR_NUMBER)) {
-    m_info = NumericalIssue; 
-    return false;
-  }
-  else {
-    return true;
-  }
+  m_info = m_iparm(IPARM_ERROR_NUMBER)==0 ? Success : NumericalIssue;
+  
+  return m_iparm(IPARM_ERROR_NUMBER)==0;
 }

 /** \ingroup PaStiXSupport_Module
@@ -516,14 +469,18 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
  public:
    typedef _MatrixType MatrixType;
    typedef PastixBase<PastixLU<MatrixType> > Base;
-    typedef typename MatrixType::Scalar Scalar;
-    typedef SparseMatrix<Scalar, ColMajor> PaStiXType; 
+    typedef typename Base::ColSpMatrix ColSpMatrix;
+    typedef typename MatrixType::Index Index;
    
  public:
-    PastixLU():Base() {}
+    PastixLU() : Base()
+    {
+      init();
+    }
    
    PastixLU(const MatrixType& matrix):Base()
    {
+      init();
      compute(matrix);
    }
    /** Compute the LU supernodal factorization of \p matrix. 
@@ -533,18 +490,9 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
      */
    void compute (const MatrixType& matrix)
    {
-      // Pastix supports only column-major matrices with a symmetric pattern
-      Base::PastixInit(); 
-      PaStiXType temp(matrix.rows(), matrix.cols());
-      // Symmetrize the graph of the matrix
-      if (IsStrSym)   
-        temp = matrix;
-      else 
-      { 
-        internal::EigenSymmetrizeMatrixGraph<PaStiXType>(matrix, temp, m_StrMatTrans, m_hasTranspose);
-      }
-      m_iparm[IPARM_SYM] = API_SYM_NO;
-      m_iparm(IPARM_FACTORIZATION) = API_FACT_LU;
+      m_structureIsUptodate = false;
+      ColSpMatrix temp;
+      grabMatrix(matrix, temp);
      Base::compute(temp);
    }
    /** Compute the LU symbolic factorization of \p matrix using its sparsity pattern. 
@@ -554,20 +502,9 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
      */
    void analyzePattern(const MatrixType& matrix)
    {
-      
-      Base::PastixInit(); 
-      /* Pastix supports only column-major matrices with symmetrized patterns */
-      SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
-      // Symmetrize the graph of the matrix
-      if (IsStrSym)   
-        temp = matrix;
-      else 
-      { 
-        internal::EigenSymmetrizeMatrixGraph<PaStiXType>(matrix, temp, m_StrMatTrans,m_hasTranspose);
-      }
-      
-      m_iparm(IPARM_SYM) = API_SYM_NO;
-      m_iparm(IPARM_FACTORIZATION) = API_FACT_LU;
+      m_structureIsUptodate = false;
+      ColSpMatrix temp;
+      grabMatrix(matrix, temp);
      Base::analyzePattern(temp);
    }

@@ -578,27 +515,48 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> >
      */ 
    void factorize(const MatrixType& matrix)
    {
-      /* Pastix supports only column-major matrices with symmetrized patterns */
-      SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
-      // Symmetrize the graph of the matrix
-      if (IsStrSym)   
-        temp = matrix;
-      else 
-      { 
-        internal::EigenSymmetrizeMatrixGraph<PaStiXType>(matrix, temp, m_StrMatTrans,m_hasTranspose);
-      }
-      m_iparm(IPARM_SYM) = API_SYM_NO;
-      m_iparm(IPARM_FACTORIZATION) = API_FACT_LU;
+      ColSpMatrix temp;
+      grabMatrix(matrix, temp);
      Base::factorize(temp);
    }
  protected:
+    
+    void init()
+    {
+      m_structureIsUptodate = false;
+      m_iparm(IPARM_SYM) = API_SYM_NO;
+      m_iparm(IPARM_FACTORIZATION) = API_FACT_LU;
+    }
+    
+    void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
+    {
+      if(IsStrSym)
+        out = matrix;
+      else
+      {
+        if(!m_structureIsUptodate)
+        {
+          // update the transposed structure
+          m_transposedStructure = matrix.transpose();
+          
+          // Set the elements of the matrix to zero 
+          for (Index j=0; j<m_transposedStructure.outerSize(); ++j) 
+            for(typename ColSpMatrix::InnerIterator it(m_transposedStructure, j); it; ++it)
+              it.valueRef() = 0.0;
+
+          m_structureIsUptodate = true;
+        }
+        
+        out = m_transposedStructure + matrix;
+      }
+      internal::c_to_fortran_numbering(out);
+    }
+    
    using Base::m_iparm;
    using Base::m_dparm;
-    using Base::m_StrMatTrans;
-    using Base::m_hasTranspose;
    
-  private:
-    PastixLU(PastixLU& ) {}
+    ColSpMatrix m_transposedStructure;
+    bool m_structureIsUptodate;
 };

 /** \ingroup PaStiXSupport_Module
@@ -621,15 +579,18 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
  public:
    typedef _MatrixType MatrixType;
    typedef PastixBase<PastixLLT<MatrixType, _UpLo> > Base;
-    typedef typename MatrixType::Scalar Scalar;
-    typedef typename MatrixType::Index Index;
+    typedef typename Base::ColSpMatrix ColSpMatrix;
    
  public:
    enum { UpLo = _UpLo };
-    PastixLLT():Base() {}
+    PastixLLT() : Base()
+    {
+      init();
+    }
    
    PastixLLT(const MatrixType& matrix):Base()
    {
+      init();
      compute(matrix);
    }

@@ -638,13 +599,8 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
      */
    void compute (const MatrixType& matrix)
    {
-      // Pastix supports only lower, column-major matrices
-      Base::PastixInit(); // This is necessary to let PaStiX initialize its data structure between successive calls to compute
-      SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
-      PermutationMatrix<Dynamic,Dynamic,Index> pnull;
-      temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
-      m_iparm(IPARM_SYM) = API_SYM_YES;
-      m_iparm(IPARM_FACTORIZATION) = API_FACT_LLT;
+      ColSpMatrix temp;
+      grabMatrix(matrix, temp);
      Base::compute(temp);
    }

@@ -654,13 +610,8 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
      */
    void analyzePattern(const MatrixType& matrix)
    {
-      Base::PastixInit(); 
-      // Pastix supports only lower, column-major matrices
-      SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
-      PermutationMatrix<Dynamic,Dynamic,Index> pnull;
-      temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
-      m_iparm(IPARM_SYM) = API_SYM_YES;
-      m_iparm(IPARM_FACTORIZATION) = API_FACT_LLT;
+      ColSpMatrix temp;
+      grabMatrix(matrix, temp);
      Base::analyzePattern(temp);
    }
      /** Compute the LL^T supernodal numerical factorization of \p matrix 
@@ -668,19 +619,25 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
        */
    void factorize(const MatrixType& matrix)
    {
-      // Pastix supports only lower, column-major matrices 
-      SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
-      PermutationMatrix<Dynamic,Dynamic,Index> pnull;
-      temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
-      m_iparm(IPARM_SYM) = API_SYM_YES;
-      m_iparm(IPARM_FACTORIZATION) = API_FACT_LLT;
+      ColSpMatrix temp;
+      grabMatrix(matrix, temp);
      Base::factorize(temp);
    }
  protected:
    using Base::m_iparm;
    
-  private:
-    PastixLLT(PastixLLT& ) {}
+    void init()
+    {
+      m_iparm(IPARM_SYM) = API_SYM_YES;
+      m_iparm(IPARM_FACTORIZATION) = API_FACT_LLT;
+    }
+    
+    void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
+    {
+      // Pastix supports only lower, column-major matrices 
+      out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>();
+      internal::c_to_fortran_numbering(out);
+    }
 };

 /** \ingroup PaStiXSupport_Module
@@ -700,18 +657,21 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> >
 template<typename _MatrixType, int _UpLo>
 class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> >
 {
-public:
+  public:
    typedef _MatrixType MatrixType;
    typedef PastixBase<PastixLDLT<MatrixType, _UpLo> > Base; 
-    typedef typename MatrixType::Scalar Scalar;
-    typedef typename MatrixType::Index Index;
+    typedef typename Base::ColSpMatrix ColSpMatrix;
    
  public:
    enum { UpLo = _UpLo };
-    PastixLDLT():Base() {}
+    PastixLDLT():Base()
+    {
+      init();
+    }
    
    PastixLDLT(const MatrixType& matrix):Base()
    {
+      init();
      compute(matrix);
    }

@@ -720,13 +680,8 @@ public:
      */
    void compute (const MatrixType& matrix)
    {
-      Base::PastixInit();
-      // Pastix supports only lower, column-major matrices
-      SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
-      PermutationMatrix<Dynamic,Dynamic,Index> pnull;
-      temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
-      m_iparm(IPARM_SYM) = API_SYM_YES;
-      m_iparm(IPARM_FACTORIZATION) = API_FACT_LDLT;
+      ColSpMatrix temp;
+      grabMatrix(matrix, temp);
      Base::compute(temp);
    }

@@ -736,14 +691,8 @@ public:
      */
    void analyzePattern(const MatrixType& matrix)
    { 
-      Base::PastixInit();
-      // Pastix supports only lower, column-major matrices
-      SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
-      PermutationMatrix<Dynamic,Dynamic,Index> pnull;
-      temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
-    
-      m_iparm(IPARM_SYM) = API_SYM_YES;
-      m_iparm(IPARM_FACTORIZATION) = API_FACT_LDLT;
+      ColSpMatrix temp;
+      grabMatrix(matrix, temp);
      Base::analyzePattern(temp);
    }
    /** Compute the LDL^T supernodal numerical factorization of \p matrix 
@@ -751,21 +700,26 @@ public:
      */
    void factorize(const MatrixType& matrix)
    {
-      // Pastix supports only lower, column-major matrices
-      SparseMatrix<Scalar, ColMajor> temp(matrix.rows(), matrix.cols());
-      PermutationMatrix<Dynamic,Dynamic,Index> pnull;
-      temp.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>().twistedBy(pnull);
-
-      m_iparm(IPARM_SYM) = API_SYM_YES;
-      m_iparm(IPARM_FACTORIZATION) = API_FACT_LDLT;
+      ColSpMatrix temp;
+      grabMatrix(matrix, temp);
      Base::factorize(temp);
    }

  protected:
    using Base::m_iparm;
    
-  private:
-    PastixLDLT(PastixLDLT& ) {}
+    void init()
+    {
+      m_iparm(IPARM_SYM) = API_SYM_YES;
+      m_iparm(IPARM_FACTORIZATION) = API_FACT_LDLT;
+    }
+    
+    void grabMatrix(const MatrixType& matrix, ColSpMatrix& out)
+    {
+      // Pastix supports only lower, column-major matrices 
+      out.template selfadjointView<Lower>() = matrix.template selfadjointView<UpLo>();
+      internal::c_to_fortran_numbering(out);
+    }
 };

 namespace internal {
--- a/Eigen/src/SparseCholesky/SimplicialCholesky.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h
@@ -76,6 +76,9 @@ enum SimplicialCholeskyMode {
  * These classes provide LL^T and LDL^T Cholesky factorizations of sparse matrices that are
  * selfadjoint and positive definite. The factorization allows for solving A.X = B where
  * X and B can be either dense or sparse.
+  * 
+  * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
+  * such that the factorized matrix is P A P^-1.
  *
  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
@@ -208,7 +211,7 @@ class SimplicialCholeskyBase : internal::noncopyable
        return;

      if(m_P.size()>0)
-        dest = m_Pinv * b;
+        dest = m_P * b;
      else
        dest = b;

@@ -222,7 +225,7 @@ class SimplicialCholeskyBase : internal::noncopyable
        derived().matrixU().solveInPlace(dest);

      if(m_P.size()>0)
-        dest = m_P * dest;
+        dest = m_Pinv * dest;
    }

    /** \internal */
@@ -268,7 +271,7 @@ class SimplicialCholeskyBase : internal::noncopyable
      eigen_assert(a.rows()==a.cols());
      int size = a.cols();
      CholMatrixType ap(size,size);
-      ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_Pinv);
+      ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
      factorize_preordered<DoLDLT>(ap);
    }

@@ -358,6 +361,9 @@ template<typename _MatrixType, int _UpLo> struct traits<SimplicialCholesky<_Matr
  * This class provides a LL^T Cholesky factorizations of sparse matrices that are
  * selfadjoint and positive definite. The factorization allows for solving A.X = B where
  * X and B can be either dense or sparse.
+  * 
+  * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
+  * such that the factorized matrix is P A P^-1.
  *
  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
@@ -443,6 +449,9 @@ public:
  * This class provides a LDL^T Cholesky factorizations without square root of sparse matrices that are
  * selfadjoint and positive definite. The factorization allows for solving A.X = B where
  * X and B can be either dense or sparse.
+  * 
+  * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization
+  * such that the factorized matrix is P A P^-1.
  *
  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
@@ -628,7 +637,7 @@ public:
        return;

      if(Base::m_P.size()>0)
-        dest = Base::m_Pinv * b;
+        dest = Base::m_P * b;
      else
        dest = b;

@@ -652,7 +661,7 @@ public:
      }

      if(Base::m_P.size()>0)
-        dest = Base::m_P * dest;
+        dest = Base::m_Pinv * dest;
    }
    
    Scalar determinant() const
@@ -678,22 +687,23 @@ void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, CholMatrixTy
  eigen_assert(a.rows()==a.cols());
  const Index size = a.rows();
  // TODO allows to configure the permutation
+  // Note that amd compute the inverse permutation
  {
    CholMatrixType C;
    C = a.template selfadjointView<UpLo>();
    // remove diagonal entries:
    // seems not to be needed
    // C.prune(keep_diag());
-    internal::minimum_degree_ordering(C, m_P);
+    internal::minimum_degree_ordering(C, m_Pinv);
  }

-  if(m_P.size()>0)
-    m_Pinv  = m_P.inverse();
+  if(m_Pinv.size()>0)
+    m_P = m_Pinv.inverse();
  else
-    m_Pinv.resize(0);
+    m_P.resize(0);

  ap.resize(size,size);
-  ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_Pinv);
+  ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
 }

 template<typename Derived>
--- a/Eigen/src/SparseCore/SparseMatrixBase.h
+++ b/Eigen/src/SparseCore/SparseMatrixBase.h
@@ -372,7 +372,7 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
    const typename SparseDenseProductReturnType<Derived,OtherDerived>::Type
    operator*(const MatrixBase<OtherDerived> &other) const;
    
-     /** \returns an expression of P^-1 H P */
+     /** \returns an expression of P H P^-1 where H is the matrix represented by \c *this */
    SparseSymmetricPermutationProduct<Derived,Upper|Lower> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const
    {
      return SparseSymmetricPermutationProduct<Derived,Upper|Lower>(derived(), perm);
--- a/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h
@@ -125,7 +125,7 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
      _dest = tmp;
    }
    
-    /** \returns an expression of P^-1 H P */
+    /** \returns an expression of P H P^-1 */
    SparseSymmetricPermutationProduct<_MatrixTypeNested,UpLo> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const
    {
      return SparseSymmetricPermutationProduct<_MatrixTypeNested,UpLo>(m_matrix, perm);
--- a/bench/BenchTimer.h
+++ b/bench/BenchTimer.h
@@ -36,7 +36,7 @@
 #   define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
 # endif
 # include <windows.h>
-#elif __APPLE__
+#elif defined(__APPLE__)
 #include <CoreServices/CoreServices.h>
 #include <mach/mach_time.h>
 #else
--- a/bench/btl/data/action_settings.txt
+++ b/bench/btl/data/action_settings.txt
@@ -5,6 +5,7 @@ axpby ; "{/*1.5 Y = alpha X + beta Y}" ; "vector size" ; 5:1000000
 axpy ; "{/*1.5 Y += alpha X}" ; "vector size" ; 5:1000000
 matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:3000
 matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:3000
+trmm ; "{/*1.5 triangular matrix matrix product}" ; "matrix size" ; 4:3000
 trisolve_vector ; "{/*1.5 triangular solver - vector (X = inv(L) X)}" ; "size" ; 4:3000
 trisolve_matrix ; "{/*1.5 triangular solver - matrix (M = inv(L) M)}" ; "size" ; 4:3000
 cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:3000
--- a/bench/btl/data/go_mean
+++ b/bench/btl/data/go_mean
@@ -38,6 +38,7 @@ source mk_mean_script.sh atv $1 11 50 300 1000 $mode $prefix
 source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $mode $prefix
 source mk_mean_script.sh aat $1 11 100 300 1000 $mode $prefix
 # source mk_mean_script.sh ata $1 11 100 300 1000 $mode $prefix
+source mk_mean_script.sh trmm $1 11 100 300 1000 $mode $prefix
 source mk_mean_script.sh trisolve_vector $1 11 100 300 1000 $mode $prefix
 source mk_mean_script.sh trisolve_matrix $1 11 100 300 1000 $mode $prefix
 source mk_mean_script.sh cholesky $1 11 100 300 1000 $mode $prefix
--- a/bench/btl/libs/eigen3/eigen3_interface.hh
+++ b/bench/btl/libs/eigen3/eigen3_interface.hh
@@ -195,16 +195,16 @@ public :
  }

  static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int N){
-    X = L.template triangularView<Lower>().solve(B);
+    X = L.template triangularView<Upper>().solve(B);
  }

  static inline void trmm(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int N){
-    X = L.template triangularView<Lower>() * B;
+    X.noalias() = L.template triangularView<Lower>() * B;
  }

  static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
    C = X;
-    internal::llt_inplace<Lower>::blocked(C);
+    internal::llt_inplace<real,Lower>::blocked(C);
    //C = X.llt().matrixL();
 //     C = X;
 //     Cholesky<gene_matrix>::computeInPlace(C);
--- a/blas/level3_impl.h
+++ b/blas/level3_impl.h
@@ -81,7 +81,7 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScal
 int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha,  RealScalar *pa, int *lda, RealScalar *pb, int *ldb)
 {
 //   std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n";
-  typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex);
+  typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking<Scalar,Scalar>&);
  static functype func[32];

  static bool init = false;
@@ -143,11 +143,17 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m,
    return xerbla_(SCALAR_SUFFIX_UP"TRSM ",&info,6);

  int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4);
-
+  
  if(SIDE(*side)==LEFT)
-    func[code](*m, *n, a, *lda, b, *ldb);
+  {
+    internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m);
+    func[code](*m, *n, a, *lda, b, *ldb, blocking);
+  }
  else
-    func[code](*n, *m, a, *lda, b, *ldb);
+  {
+    internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*n);
+    func[code](*n, *m, a, *lda, b, *ldb, blocking);
+  }

  if(alpha!=Scalar(1))
    matrix(b,*m,*n,*ldb) *= alpha;
--- a/doc/I09_Vectorization.dox
+++ b/doc/I09_Vectorization.dox
@@ -1,6 +1,6 @@
 namespace Eigen {

-/** \page TopicVectorization Vectorizaion
+/** \page TopicVectorization Vectorization


 TODO: write this dox page!
--- a/doc/TopicMultithreading.dox
+++ b/doc/TopicMultithreading.dox
@@ -0,0 +1,46 @@
+namespace Eigen {
+
+/** \page TopicMultiThreading Eigen and multi-threading
+
+\section TopicMultiThreading_MakingEigenMT Make Eigen run in parallel
+
+Some Eigen's algorithms can exploit the multiple cores present in your hardware. To this end, it is enough to enable OpenMP on your compiler, for instance:
+ * GCC: \c -fopenmp
+ * ICC: \c -openmp
+ * MSVC: check the respective option in the build properties.
+You can control the number of thread that will be used using either the OpenMP API or Eiegn's API using the following priority:
+\code
+ OMP_NUM_THREADS=n ./my_program
+ omp_set_num_threads(n);
+ Eigen::setNbThreads(n);
+\endcode
+Unless setNbThreads has been called, Eigen uses the number of threads specified by OpenMP. You can restore this bahavior by calling \code setNbThreads(0); \endcode
+You can query the number of threads that will be used with:
+\code
+n = Eigen::nbThreads(n);
+\endcode
+You can disable Eigen's multi threading at compile time by defining the EIGEN_DONT_PARALLELIZE preprocessor token.
+
+Currently, the following algorithms can make use of multi-threading:
+ * general matrix - matrix products
+ * PartialPivLU
+
+\section TopicMultiThreading_UsingEigenWithMT Using Eigen in a multi-threaded application
+
+In the case your own application is multithreaded, and multiple threads make calls to Eigen, then you have to initialize Eigen by calling the following routine \b before creating the threads:
+\code
+#include <Eigen/Core>
+
+int main(int argc, char** argv)
+{
+  Eigen::initParallel();
+  
+  ...
+}
+\endcode
+
+In the case your application is parallelized with OpenMP, you might want to disable Eigen's own parallization as detailed in the previous section.
+
+*/
+
+}
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -18,13 +18,15 @@ set(LAPACK_FOUND TRUE)
 set(BLAS_LIBRARIES eigen_blas)
 set(LAPACK_LIBRARIES eigen_lapack)

-if(TEST_REAL_CASES)
+set(EIGEN_TEST_MATRIX_DIR "" CACHE STRING "Enable testing of realword sparse matrices contained in the specified path")
+if(EIGEN_TEST_MATRIX_DIR)
  if(NOT WIN32)
-    add_definitions( -DTEST_REAL_CASES="${TEST_REAL_CASES}" )
+    message(STATUS "Test realworld sparse matrices: ${EIGEN_TEST_MATRIX_DIR}")
+    add_definitions( -DTEST_REAL_CASES="${EIGEN_TEST_MATRIX_DIR}" )
  else(NOT WIN32)
-    message(STATUS, "REAL CASES CAN NOT BE CURRENTLY TESTED ON WIN32")
+    message(STATUS "REAL CASES CAN NOT BE CURRENTLY TESTED ON WIN32")
  endif(NOT WIN32)
-endif(TEST_REAL_CASES)
+endif(EIGEN_TEST_MATRIX_DIR)

 set(SPARSE_LIBS " ")

--- a/test/nomalloc.cpp
+++ b/test/nomalloc.cpp
@@ -129,13 +129,20 @@ void ctms_decompositions()
                        0,
                        maxSize, maxSize> ComplexMatrix;

-  const Matrix A(Matrix::Random(size, size));
+  const Matrix A(Matrix::Random(size, size)), B(Matrix::Random(size, size));
+  Matrix X(size,size);
  const ComplexMatrix complexA(ComplexMatrix::Random(size, size));
  const Matrix saA = A.adjoint() * A;
+  const Vector b(Vector::Random(size));
+  Vector x(size);

  // Cholesky module
  Eigen::LLT<Matrix>  LLT;  LLT.compute(A);
+  X = LLT.solve(B);
+  x = LLT.solve(b);
  Eigen::LDLT<Matrix> LDLT; LDLT.compute(A);
+  X = LDLT.solve(B);
+  x = LDLT.solve(b);

  // Eigenvalues module
  Eigen::HessenbergDecomposition<ComplexMatrix> hessDecomp;        hessDecomp.compute(complexA);
@@ -147,12 +154,22 @@ void ctms_decompositions()

  // LU module
  Eigen::PartialPivLU<Matrix> ppLU; ppLU.compute(A);
+  X = ppLU.solve(B);
+  x = ppLU.solve(b);
  Eigen::FullPivLU<Matrix>    fpLU; fpLU.compute(A);
+  X = fpLU.solve(B);
+  x = fpLU.solve(b);

  // QR module
  Eigen::HouseholderQR<Matrix>        hQR;  hQR.compute(A);
+  X = hQR.solve(B);
+  x = hQR.solve(b);
  Eigen::ColPivHouseholderQR<Matrix>  cpQR; cpQR.compute(A);
+  X = cpQR.solve(B);
+  x = cpQR.solve(b);
  Eigen::FullPivHouseholderQR<Matrix> fpQR; fpQR.compute(A);
+  X = fpQR.solve(B);
+  x = fpQR.solve(b);

  // SVD module
  Eigen::JacobiSVD<Matrix> jSVD; jSVD.compute(A, ComputeFullU | ComputeFullV);
Author	SHA1	Message	Date
Gael Guennebaud	4ca5735de4	bump to 3.1.0-rc1	2012-06-14 21:25:50 +02:00
Gael Guennebaud	b9f25ee656	bug #466 : better fix for the race condition: this new patch add an initParallel() function which must be called at the initialization time of any multi-threaded application calling Eigen from multiple threads.	2012-06-14 14:24:15 +02:00
Gael Guennebaud	a3e700db72	fix bug #475 : .exp() now returns +inf when overflow occurs (SSE)	2012-06-14 10:38:39 +02:00
Gael Guennebaud	324ecf153b	disable the MKL's vm*powx functions on windows	2012-06-14 09:49:57 +02:00
Gael Guennebaud	9c7b62415a	simplify and clean a bit the Pastix support module	2012-06-12 16:47:14 +02:00
Gael Guennebaud	4e8523b835	update blas interface for trsm	2012-06-12 14:33:03 +02:00
Gael Guennebaud	88e051019b	extend nomalloc unit test to test the solve calls	2012-06-12 13:12:47 +02:00
Gael Guennebaud	cd48254a87	fix inclusion order	2012-06-12 11:40:33 +02:00
Gael Guennebaud	924c7a9300	avoid dynamic allocation for fixed size triangular solving	2012-06-12 11:33:50 +02:00
Gael Guennebaud	bc580bbffb	fix typo	2012-06-11 18:49:30 +02:00
Gael Guennebaud	f2849fac20	Fix bug #466 : race condition destected by helgrind in manage_caching_sizes. After all, the solution based on threadprivate is not that costly.	2012-06-08 17:29:02 +02:00
Gael Guennebaud	28d0a8580e	workaround ICC 11.1 compilation issue	2012-06-08 14:13:28 +02:00
Gael Guennebaud	7e36d32b32	fix ambiguous calls in the functors by prefixing function calls with internal::	2012-06-08 09:53:50 +02:00
Gael Guennebaud	5cec86cb1e	BTL: add missing TRMM plots, update Eigen's interface	2012-06-07 18:35:38 +02:00
Gael Guennebaud	512e0b151b	clean the support for testing existing sparse problems	2012-06-07 18:31:09 +02:00
Gael Guennebaud	83c932ed15	fix a warning	2012-06-07 18:22:13 +02:00
Gael Guennebaud	1e5e66b642	For consistency, Simplicial* now factorizes P A P^-1 (instead of P^-1 A P). Document how is applied the permutation in Simplicial* .	2012-06-07 16:24:46 +02:00
Gael Guennebaud	63c6ab3e42	fix documentaion of twistedBy	2012-06-07 16:18:00 +02:00
Gael Guennebaud	c1edb7fd95	Added tag 3.1.0-beta1 for changeset `b7a7285909`	2012-06-06 22:34:08 +02:00