bump to 3.2.4

bug #329 : fix typo
(grafted from b9d314ae19 )
2026-04-10 11:34:33 +08:00 · 2015-01-21 17:26:41 +01:00 · 2015-01-17 21:55:33 +01:00 · 2015-01-14 22:06:09 +01:00 · 2015-01-13 10:11:44 +01:00 · 2015-01-13 09:25:24 +01:00
184 changed files with 3039 additions and 4571 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -204,7 +204,7 @@ if(NOT MSVC)

  option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
  if(EIGEN_TEST_NEON)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a"8)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8")
    message(STATUS "Enabling NEON in tests/examples")
  endif()

--- a/CTestConfig.cmake
+++ b/CTestConfig.cmake
@@ -4,14 +4,10 @@
 ## # The following are required to uses Dart and the Cdash dashboard
 ##   ENABLE_TESTING()
 ##   INCLUDE(CTest)
-set(CTEST_PROJECT_NAME "Eigen")
+set(CTEST_PROJECT_NAME "Eigen3.2")
 set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")

 set(CTEST_DROP_METHOD "http")
 set(CTEST_DROP_SITE "manao.inria.fr")
-set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
+set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.2")
 set(CTEST_DROP_SITE_CDASH TRUE)
-set(CTEST_PROJECT_SUBPROJECTS
-Official
-Unsupported
-)
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -95,7 +95,7 @@
    extern "C" {
      // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
      // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
-      #ifdef __INTEL_COMPILER
+      #if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1110
        #include <immintrin.h>
      #else
        #include <emmintrin.h>
@@ -165,7 +165,7 @@
 #endif

 // required for __cpuid, needs to be included after cmath
-#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64))
+#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64)) && (!defined(_WIN32_WCE))
  #include <intrin.h>
 #endif

@@ -350,13 +350,6 @@ using std::ptrdiff_t;
 #include "src/Core/ArrayBase.h"
 #include "src/Core/ArrayWrapper.h"

-#ifdef EIGEN_ENABLE_EVALUATORS
-#include "src/Core/Product.h"
-#include "src/Core/CoreEvaluators.h"
-#include "src/Core/AssignEvaluator.h"
-#include "src/Core/ProductEvaluators.h"
-#endif
-
 #ifdef EIGEN_USE_BLAS
 #include "src/Core/products/GeneralMatrixMatrix_MKL.h"
 #include "src/Core/products/GeneralMatrixVector_MKL.h"
--- a/Eigen/Eigen2Support
+++ b/Eigen/Eigen2Support
@@ -14,12 +14,25 @@
 #error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header
 #endif

+#ifndef EIGEN_NO_EIGEN2_DEPRECATED_WARNING
+
+#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
+#warning "Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)"
+#else
+#pragma message ("Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)")
+#endif
+
+#endif // EIGEN_NO_EIGEN2_DEPRECATED_WARNING
+
 #include "src/Core/util/DisableStupidWarnings.h"

 /** \ingroup Support_modules
  * \defgroup Eigen2Support_Module Eigen2 support module
-  * This module provides a couple of deprecated functions improving the compatibility with Eigen2.
  *
+  * \warning Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3.
+  *
+  * This module provides a couple of deprecated functions improving the compatibility with Eigen2.
+  * 
  * To use it, define EIGEN2_SUPPORT before including any Eigen header
  * \code
  * #define EIGEN2_SUPPORT
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -16,7 +16,10 @@
 namespace Eigen { 

 namespace internal {
-template<typename MatrixType, int UpLo> struct LDLT_Traits;
+  template<typename MatrixType, int UpLo> struct LDLT_Traits;
+
+  // PositiveSemiDef means positive semi-definite and non-zero; same for NegativeSemiDef
+  enum SignMatrix { PositiveSemiDef, NegativeSemiDef, ZeroSign, Indefinite };
 }

 /** \ingroup Cholesky_Module
@@ -69,7 +72,12 @@ template<typename _MatrixType, int _UpLo> class LDLT
      * The default constructor is useful in cases in which the user intends to
      * perform decompositions via LDLT::compute(const MatrixType&).
      */
-    LDLT() : m_matrix(), m_transpositions(), m_isInitialized(false) {}
+    LDLT() 
+      : m_matrix(), 
+        m_transpositions(), 
+        m_sign(internal::ZeroSign),
+        m_isInitialized(false) 
+    {}

    /** \brief Default Constructor with memory preallocation
      *
@@ -81,6 +89,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
      : m_matrix(size, size),
        m_transpositions(size),
        m_temporary(size),
+        m_sign(internal::ZeroSign),
        m_isInitialized(false)
    {}

@@ -93,6 +102,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
      : m_matrix(matrix.rows(), matrix.cols()),
        m_transpositions(matrix.rows()),
        m_temporary(matrix.rows()),
+        m_sign(internal::ZeroSign),
        m_isInitialized(false)
    {
      compute(matrix);
@@ -139,7 +149,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
    inline bool isPositive() const
    {
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
-      return m_sign == 1;
+      return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign;
    }
    
    #ifdef EIGEN2_SUPPORT
@@ -153,7 +163,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
    inline bool isNegative(void) const
    {
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
-      return m_sign == -1;
+      return m_sign == internal::NegativeSemiDef || m_sign == internal::ZeroSign;
    }

    /** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A.
@@ -235,7 +245,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
    MatrixType m_matrix;
    TranspositionType m_transpositions;
    TmpMatrixType m_temporary;
-    int m_sign;
+    internal::SignMatrix m_sign;
    bool m_isInitialized;
 };

@@ -246,7 +256,7 @@ template<int UpLo> struct ldlt_inplace;
 template<> struct ldlt_inplace<Lower>
 {
  template<typename MatrixType, typename TranspositionType, typename Workspace>
-  static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0)
+  static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)
  {
    using std::abs;
    typedef typename MatrixType::Scalar Scalar;
@@ -258,36 +268,19 @@ template<> struct ldlt_inplace<Lower>
    if (size <= 1)
    {
      transpositions.setIdentity();
-      if(sign)
-        *sign = numext::real(mat.coeff(0,0))>0 ? 1:-1;
+      if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef;
+      else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef;
+      else sign = ZeroSign;
      return true;
    }

-    RealScalar cutoff(0), biggest_in_corner;
-
    for (Index k = 0; k < size; ++k)
    {
      // Find largest diagonal element
      Index index_of_biggest_in_corner;
-      biggest_in_corner = mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner);
+      mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner);
      index_of_biggest_in_corner += k;

-      if(k == 0)
-      {
-        // The biggest overall is the point of reference to which further diagonals
-        // are compared; if any diagonal is negligible compared
-        // to the largest overall, the algorithm bails.
-        cutoff = abs(NumTraits<Scalar>::epsilon() * biggest_in_corner);
-      }
-
-      // Finish early if the matrix is not full rank.
-      if(biggest_in_corner < cutoff)
-      {
-        for(Index i = k; i < size; i++) transpositions.coeffRef(i) = i;
-        if(sign) *sign = 0;
-        break;
-      }
-
      transpositions.coeffRef(k) = index_of_biggest_in_corner;
      if(k != index_of_biggest_in_corner)
      {
@@ -318,22 +311,27 @@ template<> struct ldlt_inplace<Lower>

      if(k>0)
      {
-        temp.head(k) = mat.diagonal().head(k).asDiagonal() * A10.adjoint();
+        temp.head(k) = mat.diagonal().real().head(k).asDiagonal() * A10.adjoint();
        mat.coeffRef(k,k) -= (A10 * temp.head(k)).value();
        if(rs>0)
          A21.noalias() -= A20 * temp.head(k);
      }
-      if((rs>0) && (abs(mat.coeffRef(k,k)) > cutoff))
-        A21 /= mat.coeffRef(k,k);
      
-      if(sign)
-      {
-        // LDLT is not guaranteed to work for indefinite matrices, but let's try to get the sign right
-        int newSign = numext::real(mat.diagonal().coeff(index_of_biggest_in_corner)) > 0;
-        if(k == 0)
-          *sign = newSign;
-        else if(*sign != newSign)
-          *sign = 0;
+      // In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot
+      // was smaller than the cutoff value. However, soince LDLT is not rank-revealing
+      // we should only make sure we do not introduce INF or NaN values.
+      // LAPACK also uses 0 as the cutoff value.
+      RealScalar realAkk = numext::real(mat.coeffRef(k,k));
+      if((rs>0) && (abs(realAkk) > RealScalar(0)))
+        A21 /= realAkk;
+
+      if (sign == PositiveSemiDef) {
+        if (realAkk < 0) sign = Indefinite;
+      } else if (sign == NegativeSemiDef) {
+        if (realAkk > 0) sign = Indefinite;
+      } else if (sign == ZeroSign) {
+        if (realAkk > 0) sign = PositiveSemiDef;
+        else if (realAkk < 0) sign = NegativeSemiDef;
      }
    }

@@ -399,7 +397,7 @@ template<> struct ldlt_inplace<Lower>
 template<> struct ldlt_inplace<Upper>
 {
  template<typename MatrixType, typename TranspositionType, typename Workspace>
-  static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0)
+  static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)
  {
    Transpose<MatrixType> matt(mat);
    return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
@@ -444,8 +442,9 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
  m_transpositions.resize(size);
  m_isInitialized = false;
  m_temporary.resize(size);
+  m_sign = internal::ZeroSign;

-  internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, &m_sign);
+  internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign);

  m_isInitialized = true;
  return *this;
@@ -473,7 +472,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
    for (Index i = 0; i < size; i++)
      m_transpositions.coeffRef(i) = i;
    m_temporary.resize(size);
-    m_sign = sigma>=0 ? 1 : -1;
+    m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef;
    m_isInitialized = true;
  }

@@ -504,16 +503,21 @@ struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
    using std::abs;
    using std::max;
    typedef typename LDLTType::MatrixType MatrixType;
-    typedef typename LDLTType::Scalar Scalar;
    typedef typename LDLTType::RealScalar RealScalar;
-    const Diagonal<const MatrixType> vectorD = dec().vectorD();
-    RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() * NumTraits<Scalar>::epsilon(),
-				 RealScalar(1) / NumTraits<RealScalar>::highest()); // motivated by LAPACK's xGELSS
+    const typename Diagonal<const MatrixType>::RealReturnType vectorD(dec().vectorD());
+    // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
+    // as motivated by LAPACK's xGELSS:
+    // RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() *NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
+    // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
+    // diagonal element is not well justified and to numerical issues in some cases.
+    // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
+    RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
+    
    for (Index i = 0; i < vectorD.size(); ++i) {
      if(abs(vectorD(i)) > tolerance)
-	dst.row(i) /= vectorD(i);
+        dst.row(i) /= vectorD(i);
      else
-	dst.row(i).setZero();
+        dst.row(i).setZero();
    }

    // dst = L^-T (D^-1 L^-1 P b)
@@ -566,7 +570,7 @@ MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
  // L^* P
  res = matrixU() * res;
  // D(L^*P)
-  res = vectorD().asDiagonal() * res;
+  res = vectorD().real().asDiagonal() * res;
  // L(DL^*P)
  res = matrixL() * res;
  // P^T (LDL^*P)
--- a/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -58,10 +58,12 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
  res.p       = mat.outerIndexPtr();
  res.i       = mat.innerIndexPtr();
  res.x       = mat.valuePtr();
+  res.z       = 0;
  res.sorted  = 1;
  if(mat.isCompressed())
  {
    res.packed  = 1;
+    res.nz = 0;
  }
  else
  {
@@ -170,6 +172,7 @@ class CholmodBase : internal::noncopyable
    CholmodBase()
      : m_cholmodFactor(0), m_info(Success), m_isInitialized(false)
    {
+      m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
      cholmod_start(&m_cholmod);
    }

@@ -241,7 +244,7 @@ class CholmodBase : internal::noncopyable
      return internal::sparse_solve_retval<CholmodBase, Rhs>(*this, b.derived());
    }
    
-    /** Performs a symbolic decomposition on the sparcity of \a matrix.
+    /** Performs a symbolic decomposition on the sparsity pattern of \a matrix.
      *
      * This function is particularly useful when solving for several problems having the same structure.
      * 
@@ -265,7 +268,7 @@ class CholmodBase : internal::noncopyable
    
    /** Performs a numeric decomposition of \a matrix
      *
-      * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed.
+      * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed.
      *
      * \sa analyzePattern()
      */
@@ -302,7 +305,7 @@ class CholmodBase : internal::noncopyable
      {
        this->m_info = NumericalIssue;
      }
-      // TODO optimize this copy by swapping when possible (be carreful with alignment, etc.)
+      // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
      dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols());
      cholmod_free_dense(&x_cd, &m_cholmod);
    }
@@ -323,7 +326,7 @@ class CholmodBase : internal::noncopyable
      {
        this->m_info = NumericalIssue;
      }
-      // TODO optimize this copy by swapping when possible (be carreful with alignment, etc.)
+      // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
      dest = viewAsEigen<DestScalar,DestOptions,DestIndex>(*x_cs);
      cholmod_free_sparse(&x_cs, &m_cholmod);
    }
@@ -365,8 +368,8 @@ class CholmodBase : internal::noncopyable
  *
  * This class allows to solve for A.X = B sparse linear problems via a simplicial LL^T Cholesky factorization
  * using the Cholmod library.
-  * This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Thefore, it has little practical interest.
-  * The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
+  * This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Therefore, it has little practical interest.
+  * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
  * X and B can be either dense or sparse.
  *
  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
@@ -412,8 +415,8 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
  *
  * This class allows to solve for A.X = B sparse linear problems via a simplicial LDL^T Cholesky factorization
  * using the Cholmod library.
-  * This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Thefore, it has little practical interest.
-  * The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
+  * This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Therefore, it has little practical interest.
+  * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
  * X and B can be either dense or sparse.
  *
  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
@@ -458,7 +461,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
  * This class allows to solve for A.X = B sparse linear problems via a supernodal LL^T Cholesky factorization
  * using the Cholmod library.
  * This supernodal variant performs best on dense enough problems, e.g., 3D FEM, or very high order 2D FEM.
-  * The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
+  * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
  * X and B can be either dense or sparse.
  *
  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
@@ -501,7 +504,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
  * \brief A general Cholesky factorization and solver based on Cholmod
  *
  * This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization
-  * using the Cholmod library. The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
+  * using the Cholmod library. The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
  * X and B can be either dense or sparse.
  *
  * This variant permits to change the underlying Cholesky method at runtime.
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -210,7 +210,7 @@ class Array
      : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
    {
      Base::_check_template_params();
-      Base::resize(other.rows(), other.cols());
+      Base::_resize_to_match(other);
      *this = other;
    }

--- a/Eigen/src/Core/ArrayWrapper.h
+++ b/Eigen/src/Core/ArrayWrapper.h
@@ -29,6 +29,11 @@ struct traits<ArrayWrapper<ExpressionType> >
  : public traits<typename remove_all<typename ExpressionType::Nested>::type >
 {
  typedef ArrayXpr XprKind;
+  // Let's remove NestByRefBit
+  enum {
+    Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
+    Flags = Flags0 & ~NestByRefBit
+  };
 };
 }

@@ -149,6 +154,11 @@ struct traits<MatrixWrapper<ExpressionType> >
 : public traits<typename remove_all<typename ExpressionType::Nested>::type >
 {
  typedef MatrixXpr XprKind;
+  // Let's remove NestByRefBit
+  enum {
+    Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
+    Flags = Flags0 & ~NestByRefBit
+  };
 };
 }

--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -1,754 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
-// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_ASSIGN_EVALUATOR_H
-#define EIGEN_ASSIGN_EVALUATOR_H
-
-namespace Eigen {
-
-// This implementation is based on Assign.h
-
-namespace internal {
-  
-/***************************************************************************
-* Part 1 : the logic deciding a strategy for traversal and unrolling       *
-***************************************************************************/
-
-// copy_using_evaluator_traits is based on assign_traits
-
-template <typename Derived, typename OtherDerived>
-struct copy_using_evaluator_traits
-{
-public:
-  enum {
-    DstIsAligned = Derived::Flags & AlignedBit,
-    DstHasDirectAccess = Derived::Flags & DirectAccessBit,
-    SrcIsAligned = OtherDerived::Flags & AlignedBit,
-    JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned,
-    SrcEvalBeforeAssign = (evaluator_traits<OtherDerived>::HasEvalTo == 1)
-  };
-
-private:
-  enum {
-    InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
-              : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
-              : int(Derived::RowsAtCompileTime),
-    InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
-              : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
-              : int(Derived::MaxRowsAtCompileTime),
-    MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
-    PacketSize = packet_traits<typename Derived::Scalar>::size
-  };
-
-  enum {
-    StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
-    MightVectorize = StorageOrdersAgree
-                  && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
-    MayInnerVectorize  = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
-                       && int(DstIsAligned) && int(SrcIsAligned),
-    MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
-    MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
-                       && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
-      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
-         so it's only good for large enough sizes. */
-    MaySliceVectorize  = MightVectorize && DstHasDirectAccess
-                       && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
-      /* slice vectorization can be slow, so we only want it if the slices are big, which is
-         indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
-         in a fixed-size matrix */
-  };
-
-public:
-  enum {
-    Traversal = int(SrcEvalBeforeAssign) ? int(AllAtOnceTraversal) 
-              : int(MayInnerVectorize)   ? int(InnerVectorizedTraversal)
-              : int(MayLinearVectorize)  ? int(LinearVectorizedTraversal)
-              : int(MaySliceVectorize)   ? int(SliceVectorizedTraversal)
-              : int(MayLinearize)        ? int(LinearTraversal)
-                                         : int(DefaultTraversal),
-    Vectorized = int(Traversal) == InnerVectorizedTraversal
-              || int(Traversal) == LinearVectorizedTraversal
-              || int(Traversal) == SliceVectorizedTraversal
-  };
-
-private:
-  enum {
-    UnrollingLimit      = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
-    MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
-                       && int(OtherDerived::CoeffReadCost) != Dynamic
-                       && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
-    MayUnrollInner      = int(InnerSize) != Dynamic
-                       && int(OtherDerived::CoeffReadCost) != Dynamic
-                       && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
-  };
-
-public:
-  enum {
-    Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
-                ? (
- 		    int(MayUnrollCompletely) ? int(CompleteUnrolling)
-                  : int(MayUnrollInner)      ? int(InnerUnrolling)
-                                             : int(NoUnrolling)
-                  )
-              : int(Traversal) == int(LinearVectorizedTraversal)
-                ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) 
-                                                                    : int(NoUnrolling) )
-              : int(Traversal) == int(LinearTraversal)
-                ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) 
-                                              : int(NoUnrolling) )
-              : int(NoUnrolling)
-  };
-
-#ifdef EIGEN_DEBUG_ASSIGN
-  static void debug()
-  {
-    EIGEN_DEBUG_VAR(DstIsAligned)
-    EIGEN_DEBUG_VAR(SrcIsAligned)
-    EIGEN_DEBUG_VAR(JointAlignment)
-    EIGEN_DEBUG_VAR(InnerSize)
-    EIGEN_DEBUG_VAR(InnerMaxSize)
-    EIGEN_DEBUG_VAR(PacketSize)
-    EIGEN_DEBUG_VAR(StorageOrdersAgree)
-    EIGEN_DEBUG_VAR(MightVectorize)
-    EIGEN_DEBUG_VAR(MayLinearize)
-    EIGEN_DEBUG_VAR(MayInnerVectorize)
-    EIGEN_DEBUG_VAR(MayLinearVectorize)
-    EIGEN_DEBUG_VAR(MaySliceVectorize)
-    EIGEN_DEBUG_VAR(Traversal)
-    EIGEN_DEBUG_VAR(UnrollingLimit)
-    EIGEN_DEBUG_VAR(MayUnrollCompletely)
-    EIGEN_DEBUG_VAR(MayUnrollInner)
-    EIGEN_DEBUG_VAR(Unrolling)
-  }
-#endif
-};
-
-/***************************************************************************
-* Part 2 : meta-unrollers
-***************************************************************************/
-
-/************************
-*** Default traversal ***
-************************/
-
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
-struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
-{
-  typedef typename DstEvaluatorType::XprType DstXprType;
-  
-  enum {
-    outer = Index / DstXprType::InnerSizeAtCompileTime,
-    inner = Index % DstXprType::InnerSizeAtCompileTime
-  };
-
-  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
-                                      SrcEvaluatorType &srcEvaluator)
-  {
-    dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
-    copy_using_evaluator_DefaultTraversal_CompleteUnrolling
-      <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
-      ::run(dstEvaluator, srcEvaluator);
-  }
-};
-
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
-struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
-{
-  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
-};
-
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
-struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
-{
-  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
-                                      SrcEvaluatorType &srcEvaluator, 
-                                      int outer)
-  {
-    dstEvaluator.copyCoeffByOuterInner(outer, Index, srcEvaluator);
-    copy_using_evaluator_DefaultTraversal_InnerUnrolling
-      <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
-      ::run(dstEvaluator, srcEvaluator, outer);
-  }
-};
-
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
-struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
-{
-  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
-};
-
-/***********************
-*** Linear traversal ***
-***********************/
-
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
-struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
-{
-  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
-                                      SrcEvaluatorType &srcEvaluator)
-  {
-    dstEvaluator.copyCoeff(Index, srcEvaluator);
-    copy_using_evaluator_LinearTraversal_CompleteUnrolling
-      <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
-      ::run(dstEvaluator, srcEvaluator);
-  }
-};
-
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
-struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
-{
-  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
-};
-
-/**************************
-*** Inner vectorization ***
-**************************/
-
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
-struct copy_using_evaluator_innervec_CompleteUnrolling
-{
-  typedef typename DstEvaluatorType::XprType DstXprType;
-  typedef typename SrcEvaluatorType::XprType SrcXprType;
-
-  enum {
-    outer = Index / DstXprType::InnerSizeAtCompileTime,
-    inner = Index % DstXprType::InnerSizeAtCompileTime,
-    JointAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
-  };
-
-  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
-                                      SrcEvaluatorType &srcEvaluator)
-  {
-    dstEvaluator.template copyPacketByOuterInner<Aligned, JointAlignment>(outer, inner, srcEvaluator);
-    enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
-    copy_using_evaluator_innervec_CompleteUnrolling
-      <DstEvaluatorType, SrcEvaluatorType, NextIndex, Stop>
-      ::run(dstEvaluator, srcEvaluator);
-  }
-};
-
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
-struct copy_using_evaluator_innervec_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
-{
-  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
-};
-
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
-struct copy_using_evaluator_innervec_InnerUnrolling
-{
-  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
-                                      SrcEvaluatorType &srcEvaluator, 
-                                      int outer)
-  {
-    dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, Index, srcEvaluator);
-    typedef typename DstEvaluatorType::XprType DstXprType;
-    enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
-    copy_using_evaluator_innervec_InnerUnrolling
-      <DstEvaluatorType, SrcEvaluatorType, NextIndex, Stop>
-      ::run(dstEvaluator, srcEvaluator, outer);
-  }
-};
-
-template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
-struct copy_using_evaluator_innervec_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
-{
-  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
-};
-
-/***************************************************************************
-* Part 3 : implementation of all cases
-***************************************************************************/
-
-// copy_using_evaluator_impl is based on assign_impl
-
-template<typename DstXprType, typename SrcXprType,
-         int Traversal = copy_using_evaluator_traits<DstXprType, SrcXprType>::Traversal,
-         int Unrolling = copy_using_evaluator_traits<DstXprType, SrcXprType>::Unrolling>
-struct copy_using_evaluator_impl;
-
-/************************
-*** Default traversal ***
-************************/
-
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnrolling>
-{
-  static void run(DstXprType& dst, const SrcXprType& src)
-  {
-    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
-    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-    typedef typename DstXprType::Index Index;
-
-    DstEvaluatorType dstEvaluator(dst);
-    SrcEvaluatorType srcEvaluator(src);
-
-    for(Index outer = 0; outer < dst.outerSize(); ++outer) {
-      for(Index inner = 0; inner < dst.innerSize(); ++inner) {
-	dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
-      }
-    }
-  }
-};
-
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, CompleteUnrolling>
-{
-  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
-  {
-    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
-    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-
-    DstEvaluatorType dstEvaluator(dst);
-    SrcEvaluatorType srcEvaluator(src);
-
-    copy_using_evaluator_DefaultTraversal_CompleteUnrolling
-      <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
-      ::run(dstEvaluator, srcEvaluator);
-  }
-};
-
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, InnerUnrolling>
-{
-  typedef typename DstXprType::Index Index;
-  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
-  {
-    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
-    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-
-    DstEvaluatorType dstEvaluator(dst);
-    SrcEvaluatorType srcEvaluator(src);
-
-    const Index outerSize = dst.outerSize();
-    for(Index outer = 0; outer < outerSize; ++outer)
-      copy_using_evaluator_DefaultTraversal_InnerUnrolling
-        <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
-        ::run(dstEvaluator, srcEvaluator, outer);
-  }
-};
-
-/***************************
-*** Linear vectorization ***
-***************************/
-
-template <bool IsAligned = false>
-struct unaligned_copy_using_evaluator_impl
-{
-  // if IsAligned = true, then do nothing
-  template <typename SrcEvaluatorType, typename DstEvaluatorType>
-  static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, 
-                                      typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {}
-};
-
-template <>
-struct unaligned_copy_using_evaluator_impl<false>
-{
-  // MSVC must not inline this functions. If it does, it fails to optimize the
-  // packet access path.
-#ifdef _MSC_VER
-  template <typename DstEvaluatorType, typename SrcEvaluatorType>
-  static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator, 
-                                    const SrcEvaluatorType &srcEvaluator,
-                                    typename DstEvaluatorType::Index start,
-                                    typename DstEvaluatorType::Index end)
-#else
-  template <typename DstEvaluatorType, typename SrcEvaluatorType>
-  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, 
-                                      const SrcEvaluatorType &srcEvaluator,
-                                      typename DstEvaluatorType::Index start,
-                                      typename DstEvaluatorType::Index end)
-#endif
-  {
-    for (typename DstEvaluatorType::Index index = start; index < end; ++index)
-      dstEvaluator.copyCoeff(index, srcEvaluator);
-  }
-};
-
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, NoUnrolling>
-{
-  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
-  {
-    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
-    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-    typedef typename DstXprType::Index Index;
-
-    DstEvaluatorType dstEvaluator(dst);
-    SrcEvaluatorType srcEvaluator(src);
-
-    const Index size = dst.size();
-    typedef packet_traits<typename DstXprType::Scalar> PacketTraits;
-    enum {
-      packetSize = PacketTraits::size,
-      dstIsAligned = int(copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned),
-      dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
-      srcAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
-    };
-    const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&dstEvaluator.coeffRef(0), size);
-    const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
-
-    unaligned_copy_using_evaluator_impl<dstIsAligned!=0>::run(dstEvaluator, srcEvaluator, 0, alignedStart);
-
-    for(Index index = alignedStart; index < alignedEnd; index += packetSize)
-    {
-      dstEvaluator.template copyPacket<dstAlignment, srcAlignment>(index, srcEvaluator);
-    }
-
-    unaligned_copy_using_evaluator_impl<>::run(dstEvaluator, srcEvaluator, alignedEnd, size);
-  }
-};
-
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, CompleteUnrolling>
-{
-  typedef typename DstXprType::Index Index;
-  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
-  {
-    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
-    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-
-    DstEvaluatorType dstEvaluator(dst);
-    SrcEvaluatorType srcEvaluator(src);
-
-    enum { size = DstXprType::SizeAtCompileTime,
-           packetSize = packet_traits<typename DstXprType::Scalar>::size,
-           alignedSize = (size/packetSize)*packetSize };
-
-    copy_using_evaluator_innervec_CompleteUnrolling
-      <DstEvaluatorType, SrcEvaluatorType, 0, alignedSize>
-      ::run(dstEvaluator, srcEvaluator);
-    copy_using_evaluator_DefaultTraversal_CompleteUnrolling
-      <DstEvaluatorType, SrcEvaluatorType, alignedSize, size>
-      ::run(dstEvaluator, srcEvaluator);
-  }
-};
-
-/**************************
-*** Inner vectorization ***
-**************************/
-
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, NoUnrolling>
-{
-  inline static void run(DstXprType &dst, const SrcXprType &src)
-  {
-    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
-    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-    typedef typename DstXprType::Index Index;
-
-    DstEvaluatorType dstEvaluator(dst);
-    SrcEvaluatorType srcEvaluator(src);
-
-    const Index innerSize = dst.innerSize();
-    const Index outerSize = dst.outerSize();
-    const Index packetSize = packet_traits<typename DstXprType::Scalar>::size;
-    for(Index outer = 0; outer < outerSize; ++outer)
-      for(Index inner = 0; inner < innerSize; inner+=packetSize) {
-	dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, inner, srcEvaluator);
-      }
-  }
-};
-
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, CompleteUnrolling>
-{
-  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
-  {
-    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
-    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-
-    DstEvaluatorType dstEvaluator(dst);
-    SrcEvaluatorType srcEvaluator(src);
-
-    copy_using_evaluator_innervec_CompleteUnrolling
-      <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
-      ::run(dstEvaluator, srcEvaluator);
-  }
-};
-
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, InnerUnrolling>
-{
-  typedef typename DstXprType::Index Index;
-  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
-  {
-    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
-    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-
-    DstEvaluatorType dstEvaluator(dst);
-    SrcEvaluatorType srcEvaluator(src);
-
-    const Index outerSize = dst.outerSize();
-    for(Index outer = 0; outer < outerSize; ++outer)
-      copy_using_evaluator_innervec_InnerUnrolling
-        <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
-        ::run(dstEvaluator, srcEvaluator, outer);
-  }
-};
-
-/***********************
-*** Linear traversal ***
-***********************/
-
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnrolling>
-{
-  inline static void run(DstXprType &dst, const SrcXprType &src)
-  {
-    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
-    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-    typedef typename DstXprType::Index Index;
-
-    DstEvaluatorType dstEvaluator(dst);
-    SrcEvaluatorType srcEvaluator(src);
-
-    const Index size = dst.size();
-    for(Index i = 0; i < size; ++i)
-      dstEvaluator.copyCoeff(i, srcEvaluator);
-  }
-};
-
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, CompleteUnrolling>
-{
-  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
-  {
-    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
-    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-
-    DstEvaluatorType dstEvaluator(dst);
-    SrcEvaluatorType srcEvaluator(src);
-
-    copy_using_evaluator_LinearTraversal_CompleteUnrolling
-      <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
-      ::run(dstEvaluator, srcEvaluator);
-  }
-};
-
-/**************************
-*** Slice vectorization ***
-***************************/
-
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversal, NoUnrolling>
-{
-  inline static void run(DstXprType &dst, const SrcXprType &src)
-  {
-    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
-    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-    typedef typename DstXprType::Index Index;
-
-    DstEvaluatorType dstEvaluator(dst);
-    SrcEvaluatorType srcEvaluator(src);
-
-    typedef packet_traits<typename DstXprType::Scalar> PacketTraits;
-    enum {
-      packetSize = PacketTraits::size,
-      alignable = PacketTraits::AlignedOnScalar,
-      dstAlignment = alignable ? Aligned : int(copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned)
-    };
-    const Index packetAlignedMask = packetSize - 1;
-    const Index innerSize = dst.innerSize();
-    const Index outerSize = dst.outerSize();
-    const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
-    Index alignedStart = ((!alignable) || copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned) ? 0
-                       : internal::first_aligned(&dstEvaluator.coeffRef(0,0), innerSize);
-
-    for(Index outer = 0; outer < outerSize; ++outer)
-    {
-      const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
-      // do the non-vectorizable part of the assignment
-      for(Index inner = 0; inner<alignedStart ; ++inner) {
-        dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
-      }
-
-      // do the vectorizable part of the assignment
-      for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) {
-        dstEvaluator.template copyPacketByOuterInner<dstAlignment, Unaligned>(outer, inner, srcEvaluator);
-      }
-
-      // do the non-vectorizable part of the assignment
-      for(Index inner = alignedEnd; inner<innerSize ; ++inner) {
-        dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
-      }
-
-      alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
-    }
-  }
-};
-
-/****************************
-*** All-at-once traversal ***
-****************************/
-
-template<typename DstXprType, typename SrcXprType>
-struct copy_using_evaluator_impl<DstXprType, SrcXprType, AllAtOnceTraversal, NoUnrolling>
-{
-  inline static void run(DstXprType &dst, const SrcXprType &src)
-  {
-    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
-    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-
-    DstEvaluatorType dstEvaluator(dst);
-    SrcEvaluatorType srcEvaluator(src);
-
-    // Evaluate rhs in temporary to prevent aliasing problems in a = a * a;
-    // TODO: Do not pass the xpr object to evalTo()
-    srcEvaluator.evalTo(dstEvaluator, dst);
-  }
-};
-
-/***************************************************************************
-* Part 4 : Entry points
-***************************************************************************/
-
-// Based on DenseBase::LazyAssign()
-
-template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
-EIGEN_STRONG_INLINE
-const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst, 
-                                       const EigenBase<SrcXprType>& src)
-{
-  return noalias_copy_using_evaluator(dst.expression(), src.derived());
-}
-
-template<typename XprType, int AssumeAliasing = evaluator_traits<XprType>::AssumeAliasing>
-struct AddEvalIfAssumingAliasing;
-
-template<typename XprType>
-struct AddEvalIfAssumingAliasing<XprType, 0>
-{
-  static const XprType& run(const XprType& xpr) 
-  {
-    return xpr;
-  }
-};
-
-template<typename XprType>
-struct AddEvalIfAssumingAliasing<XprType, 1>
-{
-  static const EvalToTemp<XprType> run(const XprType& xpr)
-  {
-    return EvalToTemp<XprType>(xpr);
-  }
-};
-
-template<typename DstXprType, typename SrcXprType>
-EIGEN_STRONG_INLINE
-const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
-{
-  return noalias_copy_using_evaluator(dst.const_cast_derived(), 
-				      AddEvalIfAssumingAliasing<SrcXprType>::run(src.derived()));
-}
-
-template<typename DstXprType, typename SrcXprType>
-EIGEN_STRONG_INLINE
-const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
-{
-#ifdef EIGEN_DEBUG_ASSIGN
-  internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
-#endif
-#ifdef EIGEN_NO_AUTOMATIC_RESIZING
-  eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size())
-				  : (dst.rows() == src.rows() && dst.cols() == src.cols())))
-	       && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
-#else
-  dst.const_cast_derived().resizeLike(src.derived());
-#endif
-  return copy_using_evaluator_without_resizing(dst.const_cast_derived(), src.derived());
-}
-
-template<typename DstXprType, typename SrcXprType>
-EIGEN_STRONG_INLINE
-const DstXprType& noalias_copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
-{
-  return copy_using_evaluator_without_resizing(dst.const_cast_derived(), src.derived());
-}
-
-template<typename DstXprType, typename SrcXprType>
-const DstXprType& copy_using_evaluator_without_resizing(const DstXprType& dst, const SrcXprType& src)
-{
-#ifdef EIGEN_DEBUG_ASSIGN
-  internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
-#endif
-  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
-  copy_using_evaluator_impl<DstXprType, SrcXprType>::run(const_cast<DstXprType&>(dst), src);
-  return dst;
-}
-
-// Based on DenseBase::swap()
-// TODO: Chech whether we need to do something special for swapping two
-//       Arrays or Matrices.
-
-template<typename DstXprType, typename SrcXprType>
-void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src)
-{
-  copy_using_evaluator(SwapWrapper<DstXprType>(const_cast<DstXprType&>(dst)), src);
-}
-
-// Based on MatrixBase::operator+= (in CwiseBinaryOp.h)
-template<typename DstXprType, typename SrcXprType>
-void add_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
-{
-  typedef typename DstXprType::Scalar Scalar;
-  SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
-  copy_using_evaluator(tmp, src.derived());
-}
-
-// Based on ArrayBase::operator+=
-template<typename DstXprType, typename SrcXprType>
-void add_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
-{
-  typedef typename DstXprType::Scalar Scalar;
-  SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
-  copy_using_evaluator(tmp, src.derived());
-}
-
-// TODO: Add add_assign_using_evaluator for EigenBase ?
-
-template<typename DstXprType, typename SrcXprType>
-void subtract_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
-{
-  typedef typename DstXprType::Scalar Scalar;
-  SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
-  copy_using_evaluator(tmp, src.derived());
-}
-
-template<typename DstXprType, typename SrcXprType>
-void subtract_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
-{
-  typedef typename DstXprType::Scalar Scalar;
-  SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
-  copy_using_evaluator(tmp, src.derived());
-}
-
-template<typename DstXprType, typename SrcXprType>
-void multiply_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
-{
-  typedef typename DstXprType::Scalar Scalar;
-  SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
-  copy_using_evaluator(tmp, src.derived());
-}
-
-template<typename DstXprType, typename SrcXprType>
-void divide_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
-{
-  typedef typename DstXprType::Scalar Scalar;
-  SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
-  copy_using_evaluator(tmp, src.derived());
-}
-
-
-} // namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_ASSIGN_EVALUATOR_H
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -81,7 +81,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
                       && (InnerStrideAtCompileTime == 1)
                        ? PacketAccessBit : 0,
    MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
-    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
+    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (traits<XprType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
    FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
    FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
    Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
--- a/Eigen/src/Core/BooleanRedux.h
+++ b/Eigen/src/Core/BooleanRedux.h
@@ -29,9 +29,9 @@ struct all_unroller
 };

 template<typename Derived>
-struct all_unroller<Derived, 1>
+struct all_unroller<Derived, 0>
 {
-  static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
+  static inline bool run(const Derived &/*mat*/) { return true; }
 };

 template<typename Derived>
@@ -55,9 +55,9 @@ struct any_unroller
 };

 template<typename Derived>
-struct any_unroller<Derived, 1>
+struct any_unroller<Derived, 0>
 {
-  static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
+  static inline bool run(const Derived & /*mat*/) { return false; }
 };

 template<typename Derived>
@@ -131,7 +131,7 @@ inline typename DenseBase<Derived>::Index DenseBase<Derived>::count() const

 /** \returns true is \c *this contains at least one Not A Number (NaN).
  *
-  * \sa isFinite()
+  * \sa allFinite()
  */
 template<typename Derived>
 inline bool DenseBase<Derived>::hasNaN() const
@@ -144,7 +144,7 @@ inline bool DenseBase<Derived>::hasNaN() const
  * \sa hasNaN()
  */
 template<typename Derived>
-inline bool DenseBase<Derived>::isFinite() const
+inline bool DenseBase<Derived>::allFinite() const
 {
  return !((derived()-derived()).hasNaN());
 }
--- a/Eigen/src/Core/CommaInitializer.h
+++ b/Eigen/src/Core/CommaInitializer.h
@@ -43,6 +43,17 @@ struct CommaInitializer
    m_xpr.block(0, 0, other.rows(), other.cols()) = other;
  }

+  /* Copy/Move constructor which transfers ownership. This is crucial in 
+   * absence of return value optimization to avoid assertions during destruction. */
+  // FIXME in C++11 mode this could be replaced by a proper RValue constructor
+  inline CommaInitializer(const CommaInitializer& o)
+  : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) {
+    // Mark original object as finished. In absence of R-value references we need to const_cast:
+    const_cast<CommaInitializer&>(o).m_row = m_xpr.rows();
+    const_cast<CommaInitializer&>(o).m_col = m_xpr.cols();
+    const_cast<CommaInitializer&>(o).m_currentBlockRows = 0;
+  }
+
  /* inserts a scalar value in the target matrix */
  CommaInitializer& operator,(const Scalar& s)
  {
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -348,7 +348,7 @@ template<typename Derived> class DenseBase
    bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
    
    inline bool hasNaN() const;
-    inline bool isFinite() const;
+    inline bool allFinite() const;

    inline Derived& operator*=(const Scalar& other);
    inline Derived& operator/=(const Scalar& other);
@@ -462,8 +462,10 @@ template<typename Derived> class DenseBase
    template<int p> RealScalar lpNorm() const;

    template<int RowFactor, int ColFactor>
-    const Replicate<Derived,RowFactor,ColFactor> replicate() const;
-    const Replicate<Derived,Dynamic,Dynamic> replicate(Index rowFacor,Index colFactor) const;
+    inline const Replicate<Derived,RowFactor,ColFactor> replicate() const;
+    
+    typedef Replicate<Derived,Dynamic,Dynamic> ReplicateReturnType;
+    inline const ReplicateReturnType replicate(Index rowFacor,Index colFactor) const;

    typedef Reverse<Derived, BothDirections> ReverseReturnType;
    typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -24,6 +24,14 @@ namespace internal {

 struct constructor_without_unaligned_array_assert {};

+template<typename T, int Size> void check_static_allocation_size()
+{
+  // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
+  #if EIGEN_STACK_ALLOCATION_LIMIT
+  EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
+  #endif
+}
+
 /** \internal
  * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
  * to 16 bytes boundary if the total size is a multiple of 16 bytes.
@@ -38,12 +46,12 @@ struct plain_array

  plain_array() 
  { 
-    EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
+    check_static_allocation_size<T,Size>();
  }

  plain_array(constructor_without_unaligned_array_assert) 
  { 
-    EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
+    check_static_allocation_size<T,Size>();
  }
 };

@@ -76,12 +84,12 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 16>
  plain_array() 
  { 
    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf);
-    EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
+    check_static_allocation_size<T,Size>();
  }

  plain_array(constructor_without_unaligned_array_assert) 
  { 
-    EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
+    check_static_allocation_size<T,Size>();
  }
 };

--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@@ -190,18 +190,18 @@ MatrixBase<Derived>::diagonal() const
  *
  * \sa MatrixBase::diagonal(), class Diagonal */
 template<typename Derived>
-inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<DynamicIndex>::Type
+inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType
 MatrixBase<Derived>::diagonal(Index index)
 {
-  return typename DiagonalIndexReturnType<DynamicIndex>::Type(derived(), index);
+  return DiagonalDynamicIndexReturnType(derived(), index);
 }

 /** This is the const version of diagonal(Index). */
 template<typename Derived>
-inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<DynamicIndex>::Type
+inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType
 MatrixBase<Derived>::diagonal(Index index) const
 {
-  return typename ConstDiagonalIndexReturnType<DynamicIndex>::Type(derived(), index);
+  return ConstDiagonalDynamicIndexReturnType(derived(), index);
 }

 /** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -126,36 +126,6 @@ Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
  return derived();
 }

-/** replaces \c *this by \c *this * \a other.
-  *
-  * \returns a reference to \c *this
-  */
-template<typename Derived>
-template<typename OtherDerived>
-inline Derived&
-MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
-{
-  other.derived().applyThisOnTheRight(derived());
-  return derived();
-}
-
-/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=().
-  */
-template<typename Derived>
-template<typename OtherDerived>
-inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
-{
-  other.derived().applyThisOnTheRight(derived());
-}
-
-/** replaces \c *this by \c *this * \a other. */
-template<typename Derived>
-template<typename OtherDerived>
-inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
-{
-  other.derived().applyThisOnTheLeft(derived());
-}
-
 } // end namespace Eigen

 #endif // EIGEN_EIGENBASE_H
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -589,7 +589,7 @@ struct linspaced_op_impl<Scalar,true>

  template<typename Index>
  EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
-  { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(i),m_interPacket))); }
+  { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); }

  const Scalar m_low;
  const Scalar m_step;
@@ -609,7 +609,7 @@ template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_o
 template <typename Scalar, bool RandomAccess> struct linspaced_op
 {
  typedef typename packet_traits<Scalar>::type Packet;
-  linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {}
+  linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {}

  template<typename Index>
  EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -232,7 +232,7 @@ EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest&
  // FIXME not very good if rhs is real and lhs complex while alpha is real too
  const Index cols = dest.cols();
  for (Index j=0; j<cols; ++j)
-    func(dest.col(j), prod.rhs().coeff(j) * prod.lhs());
+    func(dest.col(j), prod.rhs().coeff(0,j) * prod.lhs());
 }

 // Row major
@@ -243,7 +243,7 @@ EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest&
  // FIXME not very good if lhs is real and rhs complex while alpha is real too
  const Index rows = dest.rows();
  for (Index i=0; i<rows; ++i)
-    func(dest.row(i), prod.lhs().coeff(i) * prod.rhs());
+    func(dest.row(i), prod.lhs().coeff(i,0) * prod.rhs());
 }

 template<typename Lhs, typename Rhs>
--- a/Eigen/src/Core/IO.h
+++ b/Eigen/src/Core/IO.h
@@ -185,21 +185,22 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
    explicit_precision = fmt.precision;
  }

+  std::streamsize old_precision = 0;
+  if(explicit_precision) old_precision = s.precision(explicit_precision);
+
  bool align_cols = !(fmt.flags & DontAlignCols);
  if(align_cols)
  {
    // compute the largest width
-    for(Index j = 1; j < m.cols(); ++j)
+    for(Index j = 0; j < m.cols(); ++j)
      for(Index i = 0; i < m.rows(); ++i)
      {
        std::stringstream sstr;
-        if(explicit_precision) sstr.precision(explicit_precision);
+        sstr.copyfmt(s);
        sstr << m.coeff(i,j);
        width = std::max<Index>(width, Index(sstr.str().length()));
      }
  }
-  std::streamsize old_precision = 0;
-  if(explicit_precision) old_precision = s.precision(explicit_precision);
  s << fmt.matPrefix;
  for(Index i = 0; i < m.rows(); ++i)
  {
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -168,6 +168,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
 template<typename Derived> class MapBase<Derived, WriteAccessors>
  : public MapBase<Derived, ReadOnlyAccessors>
 {
+    typedef MapBase<Derived, ReadOnlyAccessors> ReadOnlyMapBase;
  public:

    typedef MapBase<Derived, ReadOnlyAccessors> Base;
@@ -230,13 +231,17 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>

    Derived& operator=(const MapBase& other)
    {
-      Base::Base::operator=(other);
+      ReadOnlyMapBase::Base::operator=(other);
      return derived();
    }

-    using Base::Base::operator=;
+    // In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base,
+    // see bugs 821 and 920.
+    using ReadOnlyMapBase::Base::operator=;
 };

+#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS
+
 } // end namespace Eigen

 #endif // EIGEN_MAPBASE_H
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -304,7 +304,7 @@ class Matrix
      : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
    {
      Base::_check_template_params();
-      Base::resize(other.rows(), other.cols());
+      Base::_resize_to_match(other);
      // FIXME/CHECK: isn't *this = other.derived() more efficient. it allows to
      //              go for pure _set() implementations, right?
      *this = other;
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -215,7 +215,7 @@ template<typename Derived> class MatrixBase

    typedef Diagonal<Derived> DiagonalReturnType;
    DiagonalReturnType diagonal();
-	typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
+    typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
    ConstDiagonalReturnType diagonal() const;

    template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
@@ -223,16 +223,12 @@ template<typename Derived> class MatrixBase

    template<int Index> typename DiagonalIndexReturnType<Index>::Type diagonal();
    template<int Index> typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
+    
+    typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
+    typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;

-    // Note: The "MatrixBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations.
-    // On the other hand they confuse MSVC8...
-    #if (defined _MSC_VER) && (_MSC_VER >= 1500) // 2008 or later
-    typename MatrixBase::template DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index);
-    typename MatrixBase::template ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const;
-    #else
-    typename DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index);
-    typename ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const;
-    #endif
+    DiagonalDynamicIndexReturnType diagonal(Index index);
+    ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;

    #ifdef EIGEN2_SUPPORT
    template<unsigned int Mode> typename internal::eigen2_part_return_type<Derived, Mode>::type part();
@@ -510,6 +506,51 @@ template<typename Derived> class MatrixBase
    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
 };

+
+/***************************************************************************
+* Implementation of matrix base methods
+***************************************************************************/
+
+/** replaces \c *this by \c *this * \a other.
+  *
+  * \returns a reference to \c *this
+  *
+  * Example: \include MatrixBase_applyOnTheRight.cpp
+  * Output: \verbinclude MatrixBase_applyOnTheRight.out
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline Derived&
+MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheRight(derived());
+  return derived();
+}
+
+/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=().
+  *
+  * Example: \include MatrixBase_applyOnTheRight.cpp
+  * Output: \verbinclude MatrixBase_applyOnTheRight.out
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheRight(derived());
+}
+
+/** replaces \c *this by \a other * \c *this.
+  *
+  * Example: \include MatrixBase_applyOnTheLeft.cpp
+  * Output: \verbinclude MatrixBase_applyOnTheLeft.out
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheLeft(derived());
+}
+
 } // end namespace Eigen

 #endif // EIGEN_MATRIXBASE_H
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -553,8 +553,12 @@ struct permut_matrix_product_retval
    template<typename Dest> inline void evalTo(Dest& dst) const
    {
      const Index n = Side==OnTheLeft ? rows() : cols();
-
-      if(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix))
+      // FIXME we need an is_same for expression that is not sensitive to constness. For instance
+      // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
+      if(    is_same<MatrixTypeNestedCleaned,Dest>::value
+          && blas_traits<MatrixTypeNestedCleaned>::HasUsableDirectAccess
+          && blas_traits<Dest>::HasUsableDirectAccess
+          && extract_data(dst) == extract_data(m_matrix))
      {
        // apply the permutation inplace
        Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -47,7 +47,10 @@ template<> struct check_rows_cols_for_overflow<Dynamic> {
  }
 };

-template <typename Derived, typename OtherDerived = Derived, bool IsVector = bool(Derived::IsVectorAtCompileTime)> struct conservative_resize_like_impl;
+template <typename Derived,
+          typename OtherDerived = Derived,
+          bool IsVector = bool(Derived::IsVectorAtCompileTime) && bool(OtherDerived::IsVectorAtCompileTime)>
+struct conservative_resize_like_impl;

 template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;

@@ -668,8 +671,10 @@ private:
    enum { ThisConstantIsPrivateInPlainObjectBase };
 };

+namespace internal {
+
 template <typename Derived, typename OtherDerived, bool IsVector>
-struct internal::conservative_resize_like_impl
+struct conservative_resize_like_impl
 {
  typedef typename Derived::Index Index;
  static void run(DenseBase<Derived>& _this, Index rows, Index cols)
@@ -729,11 +734,14 @@ struct internal::conservative_resize_like_impl
  }
 };

-namespace internal {
-
+// Here, the specialization for vectors inherits from the general matrix case
+// to allow calling .conservativeResize(rows,cols) on vectors.
 template <typename Derived, typename OtherDerived>
 struct conservative_resize_like_impl<Derived,OtherDerived,true>
+  : conservative_resize_like_impl<Derived,OtherDerived,false>
 {
+  using conservative_resize_like_impl<Derived,OtherDerived,false>::run;
+  
  typedef typename Derived::Index Index;
  static void run(DenseBase<Derived>& _this, Index size)
  {
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -1,107 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_PRODUCT_H
-#define EIGEN_PRODUCT_H
-
-namespace Eigen {
-
-template<typename Lhs, typename Rhs> class Product;
-template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
-
-/** \class Product
-  * \ingroup Core_Module
-  *
-  * \brief Expression of the product of two arbitrary matrices or vectors
-  *
-  * \param Lhs the type of the left-hand side expression
-  * \param Rhs the type of the right-hand side expression
-  *
-  * This class represents an expression of the product of two arbitrary matrices.
-  *
-  */
-
-// Use ProductReturnType to get correct traits, in particular vectorization flags
-namespace internal {
-template<typename Lhs, typename Rhs>
-struct traits<Product<Lhs, Rhs> >
-  : traits<typename ProductReturnType<Lhs, Rhs>::Type>
-{ 
-  // We want A+B*C to be of type Product<Matrix, Sum> and not Product<Matrix, Matrix>
-  // TODO: This flag should eventually go in a separate evaluator traits class
-  enum {
-    Flags = traits<typename ProductReturnType<Lhs, Rhs>::Type>::Flags & ~EvalBeforeNestingBit
-  };
-};
-} // end namespace internal
-
-
-template<typename Lhs, typename Rhs>
-class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
-                                                                            typename internal::traits<Rhs>::StorageKind>::ret>
-{
-  public:
-    
-    typedef typename ProductImpl<
-        Lhs, Rhs,
-        typename internal::promote_storage_type<typename Lhs::StorageKind,
-                                                typename Rhs::StorageKind>::ret>::Base Base;
-    EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
-
-    typedef typename Lhs::Nested LhsNested;
-    typedef typename Rhs::Nested RhsNested;
-    typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
-    typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
-
-    Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
-    {
-      eigen_assert(lhs.cols() == rhs.rows()
-        && "invalid matrix product"
-        && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
-    }
-
-    inline Index rows() const { return m_lhs.rows(); }
-    inline Index cols() const { return m_rhs.cols(); }
-
-    const LhsNestedCleaned& lhs() const { return m_lhs; }
-    const RhsNestedCleaned& rhs() const { return m_rhs; }
-
-  protected:
-
-    LhsNested m_lhs;
-    RhsNested m_rhs;
-};
-
-template<typename Lhs, typename Rhs>
-class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type
-{
-    typedef Product<Lhs, Rhs> Derived;
-  public:
-
-    typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
-};
-
-/***************************************************************************
-* Implementation of matrix base methods
-***************************************************************************/
-
-
-/** \internal used to test the evaluator only
-  */
-template<typename Lhs,typename Rhs>
-const Product<Lhs,Rhs>
-prod(const Lhs& lhs, const Rhs& rhs)
-{
-  return Product<Lhs,Rhs>(lhs,rhs);
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_PRODUCT_H
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@@ -85,7 +85,14 @@ class ProductBase : public MatrixBase<Derived>

  public:

+#ifndef EIGEN_NO_MALLOC
+    typedef typename Base::PlainObject BasePlainObject;
+    typedef Matrix<Scalar,RowsAtCompileTime==1?1:Dynamic,ColsAtCompileTime==1?1:Dynamic,BasePlainObject::Options> DynPlainObject;
+    typedef typename internal::conditional<(BasePlainObject::SizeAtCompileTime==Dynamic) || (BasePlainObject::SizeAtCompileTime*int(sizeof(Scalar)) < int(EIGEN_STACK_ALLOCATION_LIMIT)),
+                                           BasePlainObject, DynPlainObject>::type PlainObject;
+#else
    typedef typename Base::PlainObject PlainObject;
+#endif

    ProductBase(const Lhs& a_lhs, const Rhs& a_rhs)
      : m_lhs(a_lhs), m_rhs(a_rhs)
@@ -180,7 +187,12 @@ namespace internal {
 template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
 struct nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
 {
-  typedef PlainObject const& type;
+  typedef typename GeneralProduct<Lhs,Rhs,Mode>::PlainObject const& type;
+};
+template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
+struct nested<const GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
+{
+  typedef typename GeneralProduct<Lhs,Rhs,Mode>::PlainObject const& type;
 };
 }

--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -1,411 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
-// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-
-#ifndef EIGEN_PRODUCTEVALUATORS_H
-#define EIGEN_PRODUCTEVALUATORS_H
-
-namespace Eigen {
-  
-namespace internal {
-  
-// We can evaluate the product either all at once, like GeneralProduct and its evalTo() function, or
-// traverse the matrix coefficient by coefficient, like CoeffBasedProduct.  Use the existing logic
-// in ProductReturnType to decide.
-
-template<typename XprType, typename ProductType>
-struct product_evaluator_dispatcher;
-
-template<typename Lhs, typename Rhs>
-struct evaluator_impl<Product<Lhs, Rhs> >
-  : product_evaluator_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type> 
-{
-  typedef Product<Lhs, Rhs> XprType;
-  typedef product_evaluator_dispatcher<XprType, typename ProductReturnType<Lhs, Rhs>::Type> Base;
-
-  evaluator_impl(const XprType& xpr) : Base(xpr) 
-  { }
-};
-
-template<typename XprType, typename ProductType>
-struct product_evaluator_traits_dispatcher;
-
-template<typename Lhs, typename Rhs>
-struct evaluator_traits<Product<Lhs, Rhs> >
-  : product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type> 
-{ 
-  static const int AssumeAliasing = 1;
-};
-
-// Case 1: Evaluate all at once
-//
-// We can view the GeneralProduct class as a part of the product evaluator. 
-// Four sub-cases: InnerProduct, OuterProduct, GemmProduct and GemvProduct.
-// InnerProduct is special because GeneralProduct does not have an evalTo() method in this case.
-
-template<typename Lhs, typename Rhs>
-struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> > 
-{
-  static const int HasEvalTo = 0;
-};
-
-template<typename Lhs, typename Rhs>
-struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> > 
-  : public evaluator<typename Product<Lhs, Rhs>::PlainObject>::type
-{
-  typedef Product<Lhs, Rhs> XprType;
-  typedef typename XprType::PlainObject PlainObject;
-  typedef typename evaluator<PlainObject>::type evaluator_base;
-
-  // TODO: Computation is too early (?)
-  product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result)
-  {
-    m_result.coeffRef(0,0) = (xpr.lhs().transpose().cwiseProduct(xpr.rhs())).sum();
-  }
-  
-protected:  
-  PlainObject m_result;
-};
-
-// For the other three subcases, simply call the evalTo() method of GeneralProduct
-// TODO: GeneralProduct should take evaluators, not expression objects.
-
-template<typename Lhs, typename Rhs, int ProductType>
-struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> > 
-{
-  static const int HasEvalTo = 1;
-};
-
-template<typename Lhs, typename Rhs, int ProductType>
-struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> > 
-{
-  typedef Product<Lhs, Rhs> XprType;
-  typedef typename XprType::PlainObject PlainObject;
-  typedef typename evaluator<PlainObject>::type evaluator_base;
-  
-  product_evaluator_dispatcher(const XprType& xpr) : m_xpr(xpr)
-  { }
-  
-  template<typename DstEvaluatorType, typename DstXprType>
-  void evalTo(DstEvaluatorType /* not used */, DstXprType& dst)
-  {
-    dst.resize(m_xpr.rows(), m_xpr.cols());
-    GeneralProduct<Lhs, Rhs, ProductType>(m_xpr.lhs(), m_xpr.rhs()).evalTo(dst);
-  }
-  
-protected: 
-  const XprType& m_xpr;
-};
-
-// Case 2: Evaluate coeff by coeff
-//
-// This is mostly taken from CoeffBasedProduct.h
-// The main difference is that we add an extra argument to the etor_product_*_impl::run() function
-// for the inner dimension of the product, because evaluator object do not know their size.
-
-template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl;
-
-template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl;
-
-template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
-struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
-{
-  static const int HasEvalTo = 0;
-};
-
-template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
-struct product_evaluator_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
-  : evaluator_impl_base<Product<Lhs, Rhs> >
-{
-  typedef Product<Lhs, Rhs> XprType;
-  typedef CoeffBasedProduct<LhsNested, RhsNested, Flags> CoeffBasedProductType;
-
-  product_evaluator_dispatcher(const XprType& xpr) 
-    : m_lhsImpl(xpr.lhs()), 
-      m_rhsImpl(xpr.rhs()),  
-      m_innerDim(xpr.lhs().cols())
-  { }
-
-  typedef typename XprType::Index Index;
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketScalar PacketScalar;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-
-  // Everything below here is taken from CoeffBasedProduct.h
-
-  enum {
-    RowsAtCompileTime = traits<CoeffBasedProductType>::RowsAtCompileTime,
-    PacketSize = packet_traits<Scalar>::size,
-    InnerSize  = traits<CoeffBasedProductType>::InnerSize,
-    CoeffReadCost = traits<CoeffBasedProductType>::CoeffReadCost,
-    Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
-    CanVectorizeInner = traits<CoeffBasedProductType>::CanVectorizeInner
-  };
-
-  typedef typename evaluator<Lhs>::type LhsEtorType;
-  typedef typename evaluator<Rhs>::type RhsEtorType;
-  typedef etor_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
-                                  Unroll ? InnerSize-1 : Dynamic,
-                                  LhsEtorType, RhsEtorType, Scalar> CoeffImpl;
-
-  const CoeffReturnType coeff(Index row, Index col) const
-  {
-    Scalar res;
-    CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
-    return res;
-  }
-
-  /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
-   * which is why we don't set the LinearAccessBit.
-   */
-  const CoeffReturnType coeff(Index index) const
-  {
-    Scalar res;
-    const Index row = RowsAtCompileTime == 1 ? 0 : index;
-    const Index col = RowsAtCompileTime == 1 ? index : 0;
-    CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
-    return res;
-  }
-
-  template<int LoadMode>
-  const PacketReturnType packet(Index row, Index col) const
-  {
-    PacketScalar res;
-    typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
-				     Unroll ? InnerSize-1 : Dynamic,
-				     LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
-    PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
-    return res;
-  }
-
-protected:
-  typename evaluator<Lhs>::type m_lhsImpl;
-  typename evaluator<Rhs>::type m_rhsImpl;
-
-  // TODO: Get rid of m_innerDim if known at compile time
-  Index m_innerDim;
-};
-
-/***************************************************************************
-* Normal product .coeff() implementation (with meta-unrolling)
-***************************************************************************/
-
-/**************************************
-*** Scalar path  - no vectorization ***
-**************************************/
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
-  {
-    etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res);
-    res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
-  }
-};
-
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
-  {
-    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
-  }
-};
-
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
-  {
-    eigen_assert(innerDim>0 && "you are using a non initialized matrix");
-    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
-    for(Index i = 1; i < innerDim; ++i)
-      res += lhs.coeff(row, i) * rhs.coeff(i, col);
-  }
-};
-
-/*******************************************
-*** Scalar path with inner vectorization ***
-*******************************************/
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
-struct etor_product_coeff_vectorized_unroller
-{
-  typedef typename Lhs::Index Index;
-  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
-  {
-    etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
-    pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
-  }
-};
-
-template<typename Lhs, typename Rhs, typename Packet>
-struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
-  {
-    pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
-  }
-};
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
-{
-  typedef typename Lhs::PacketScalar Packet;
-  typedef typename Lhs::Index Index;
-  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
-  {
-    Packet pres;
-    etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
-    etor_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, innerDim, res);
-    res = predux(pres);
-  }
-};
-
-template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
-struct etor_product_coeff_vectorized_dyn_selector
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
-  {
-    res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
-  }
-};
-
-// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
-// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
-template<typename Lhs, typename Rhs, int RhsCols>
-struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
-  {
-    res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
-  }
-};
-
-template<typename Lhs, typename Rhs, int LhsRows>
-struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
-  {
-    res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
-  }
-};
-
-template<typename Lhs, typename Rhs>
-struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
-{
-  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
-  {
-    res = lhs.transpose().cwiseProduct(rhs).sum();
-  }
-};
-
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
-  {
-    etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res);
-  }
-};
-
-/*******************
-*** Packet path  ***
-*******************/
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
-  {
-    etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
-    res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
-  }
-};
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
-  {
-    etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
-    res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
-  }
-};
-
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
-  {
-    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
-  }
-};
-
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
-  {
-    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
-  }
-};
-
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
-  {
-    eigen_assert(innerDim>0 && "you are using a non initialized matrix");
-    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
-    for(Index i = 1; i < innerDim; ++i)
-      res =  pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
-  }
-};
-
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
-  {
-    eigen_assert(innerDim>0 && "you are using a non initialized matrix");
-    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
-    for(Index i = 1; i < innerDim; ++i)
-      res =  pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
-  }
-};
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_PRODUCT_EVALUATORS_H
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -94,13 +94,14 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
  typedef _PlainObjectType PlainObjectType;
  typedef _StrideType StrideType;
  enum {
-    Options = _Options
+    Options = _Options,
+    Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit
  };

  template<typename Derived> struct match {
    enum {
      HasDirectAccess = internal::has_direct_access<Derived>::ret,
-      StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)),
+      StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)),
      InnerStrideMatch = int(StrideType::InnerStrideAtCompileTime)==int(Dynamic)
                      || int(StrideType::InnerStrideAtCompileTime)==int(Derived::InnerStrideAtCompileTime)
                      || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
@@ -111,7 +112,7 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
    };
    typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
  };
-
+  
 };

 template<typename Derived>
@@ -171,8 +172,12 @@ protected:
    }
    else
      ::new (static_cast<Base*>(this)) Base(expr.data(), expr.rows(), expr.cols());
-    ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(),
-                                 StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());    
+    
+    if(Expression::IsVectorAtCompileTime && (!PlainObjectType::IsVectorAtCompileTime) && ((Expression::Flags&RowMajorBit)!=(PlainObjectType::Flags&RowMajorBit)))
+      ::new (&m_stride) StrideBase(expr.innerStride(), StrideType::InnerStrideAtCompileTime==0?0:1);
+    else
+      ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(),
+                                   StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());    
  }

  StrideBase m_stride;
@@ -183,6 +188,8 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
  : public RefBase<Ref<PlainObjectType, Options, StrideType> >
 {
    typedef internal::traits<Ref> Traits;
+    template<typename Derived>
+    inline Ref(const PlainObjectBase<Derived>& expr);
  public:

    typedef RefBase<Ref> Base;
@@ -191,20 +198,21 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref

    #ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename Derived>
-    inline Ref(PlainObjectBase<Derived>& expr,
-               typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
+    inline Ref(PlainObjectBase<Derived>& expr)
    {
-      Base::construct(expr);
+      EIGEN_STATIC_ASSERT(static_cast<bool>(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
+      Base::construct(expr.derived());
    }
    template<typename Derived>
-    inline Ref(const DenseBase<Derived>& expr,
-               typename internal::enable_if<bool(internal::is_lvalue<Derived>::value&&bool(Traits::template match<Derived>::MatchAtCompileTime)),Derived>::type* = 0,
-               int = Derived::ThisConstantIsPrivateInPlainObjectBase)
+    inline Ref(const DenseBase<Derived>& expr)
    #else
    template<typename Derived>
    inline Ref(DenseBase<Derived>& expr)
    #endif
    {
+      EIGEN_STATIC_ASSERT(static_cast<bool>(internal::is_lvalue<Derived>::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
+      EIGEN_STATIC_ASSERT(static_cast<bool>(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
+      enum { THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY = Derived::ThisConstantIsPrivateInPlainObjectBase};
      Base::construct(expr.const_cast_derived());
    }

--- a/Eigen/src/Core/Replicate.h
+++ b/Eigen/src/Core/Replicate.h
@@ -135,7 +135,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
  */
 template<typename Derived>
 template<int RowFactor, int ColFactor>
-inline const Replicate<Derived,RowFactor,ColFactor>
+const Replicate<Derived,RowFactor,ColFactor>
 DenseBase<Derived>::replicate() const
 {
  return Replicate<Derived,RowFactor,ColFactor>(derived());
@@ -150,7 +150,7 @@ DenseBase<Derived>::replicate() const
  * \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
  */
 template<typename Derived>
-inline const Replicate<Derived,Dynamic,Dynamic>
+const typename DenseBase<Derived>::ReplicateReturnType
 DenseBase<Derived>::replicate(Index rowFactor,Index colFactor) const
 {
  return Replicate<Derived,Dynamic,Dynamic>(derived(),rowFactor,colFactor);
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -17,16 +17,29 @@ namespace internal {
 template<typename ExpressionType, typename Scalar>
 inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
 {
-  Scalar max = bl.cwiseAbs().maxCoeff();
-  if (max>scale)
+  using std::max;
+  Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
+  
+  if (maxCoeff>scale)
  {
-    ssq = ssq * numext::abs2(scale/max);
-    scale = max;
-    invScale = Scalar(1)/scale;
+    ssq = ssq * numext::abs2(scale/maxCoeff);
+    Scalar tmp = Scalar(1)/maxCoeff;
+    if(tmp > NumTraits<Scalar>::highest())
+    {
+      invScale = NumTraits<Scalar>::highest();
+      scale = Scalar(1)/invScale;
+    }
+    else
+    {
+      scale = maxCoeff;
+      invScale = tmp;
+    }
  }
-  // TODO if the max is much much smaller than the current scale,
+  
+  // TODO if the maxCoeff is much much smaller than the current scale,
  // then we can neglect this sub vector
-  ssq += (bl*invScale).squaredNorm();
+  if(scale>Scalar(0)) // if scale==0, then bl is 0 
+    ssq += (bl*invScale).squaredNorm();
 }

 template<typename Derived>
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -284,7 +284,8 @@ struct inplace_transpose_selector<MatrixType,false> { // non square matrix
  * Notice however that this method is only useful if you want to replace a matrix by its own transpose.
  * If you just need the transpose of a matrix, use transpose().
  *
-  * \note if the matrix is not square, then \c *this must be a resizable matrix.
+  * \note if the matrix is not square, then \c *this must be a resizable matrix. 
+  * This excludes (non-square) fixed-size matrices, block-expressions and maps.
  *
  * \sa transpose(), adjoint(), adjointInPlace() */
 template<typename Derived>
@@ -315,6 +316,7 @@ inline void DenseBase<Derived>::transposeInPlace()
  * If you just need the adjoint of a matrix, use adjoint().
  *
  * \note if the matrix is not square, then \c *this must be a resizable matrix.
+  * This excludes (non-square) fixed-size matrices, block-expressions and maps.
  *
  * \sa transpose(), adjoint(), transposeInPlace() */
 template<typename Derived>
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -278,21 +278,21 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView

    /** Efficient triangular matrix times vector/matrix product */
    template<typename OtherDerived>
-    TriangularProduct<Mode,true,MatrixType,false,OtherDerived, OtherDerived::IsVectorAtCompileTime>
+    TriangularProduct<Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1>
    operator*(const MatrixBase<OtherDerived>& rhs) const
    {
      return TriangularProduct
-              <Mode,true,MatrixType,false,OtherDerived,OtherDerived::IsVectorAtCompileTime>
+              <Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1>
              (m_matrix, rhs.derived());
    }

    /** Efficient vector/matrix times triangular matrix product */
    template<typename OtherDerived> friend
-    TriangularProduct<Mode,false,OtherDerived,OtherDerived::IsVectorAtCompileTime,MatrixType,false>
+    TriangularProduct<Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false>
    operator*(const MatrixBase<OtherDerived>& lhs, const TriangularView& rhs)
    {
      return TriangularProduct
-              <Mode,false,OtherDerived,OtherDerived::IsVectorAtCompileTime,MatrixType,false>
+              <Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false>
              (lhs.derived(),rhs.m_matrix);
    }

@@ -380,19 +380,19 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
    EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
    {
      setZero();
-      return assignProduct(other,1);
+      return assignProduct(other.derived(),1);
    }
    
    template<typename ProductDerived, typename Lhs, typename Rhs>
    EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
    {
-      return assignProduct(other,1);
+      return assignProduct(other.derived(),1);
    }
    
    template<typename ProductDerived, typename Lhs, typename Rhs>
    EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
    {
-      return assignProduct(other,-1);
+      return assignProduct(other.derived(),-1);
    }
    
    
@@ -400,25 +400,34 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
    EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct<ProductDerived>& other)
    {
      setZero();
-      return assignProduct(other,other.alpha());
+      return assignProduct(other.derived(),other.alpha());
    }
    
    template<typename ProductDerived>
    EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct<ProductDerived>& other)
    {
-      return assignProduct(other,other.alpha());
+      return assignProduct(other.derived(),other.alpha());
    }
    
    template<typename ProductDerived>
    EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct<ProductDerived>& other)
    {
-      return assignProduct(other,-other.alpha());
+      return assignProduct(other.derived(),-other.alpha());
    }
    
  protected:
    
    template<typename ProductDerived, typename Lhs, typename Rhs>
    EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
+    
+    template<int Mode, bool LhsIsTriangular,
+         typename Lhs, bool LhsIsVector,
+         typename Rhs, bool RhsIsVector>
+    EIGEN_STRONG_INLINE TriangularView& assignProduct(const TriangularProduct<Mode, LhsIsTriangular, Lhs, LhsIsVector, Rhs, RhsIsVector>& prod, const Scalar& alpha)
+    {
+      lazyAssign(alpha*prod.eval());
+      return *this;
+    }

    MatrixTypeNested m_matrix;
 };
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -50,7 +50,7 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
    MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
    Flags0 = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits,
    Flags = (Flags0 & ~RowMajorBit) | (RowsAtCompileTime == 1 ? RowMajorBit : 0),
-    TraversalSize = Direction==Vertical ? RowsAtCompileTime : ColsAtCompileTime
+    TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime :  MatrixType::ColsAtCompileTime
  };
  #if EIGEN_GNUC_AT_LEAST(3,4)
  typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
@@ -58,7 +58,8 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
  typedef typename MemberOp::template Cost<InputScalar,TraversalSize> CostOpType;
  #endif
  enum {
-    CoeffReadCost = TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
+    CoeffReadCost = TraversalSize==Dynamic ? Dynamic
+                  : TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
  };
 };
 }
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -110,7 +110,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<
 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }

-template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { __pld((float *)addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { EIGEN_ARM_PREFETCH((float *)addr); }

 template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
 {
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -48,9 +48,18 @@ typedef uint32x4_t  Packet4ui;
  #define EIGEN_INIT_NEON_PACKET2(X, Y)       {X, Y}
  #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
 #endif
-    
-#ifndef __pld
-#define __pld(x) asm volatile ( "   pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
+
+// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
+// which available on LLVM and GCC (at least)
+#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
+  #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
+#elif defined __pld
+  #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
+#elif !defined(__aarch64__)
+  #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( "   pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
+#else
+  // by default no explicit prefetching
+  #define EIGEN_ARM_PREFETCH(ADDR)
 #endif

 template<> struct packet_traits<float>  : default_packet_traits
@@ -209,8 +218,8 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int*       to, const Packet4i& f
 template<> EIGEN_STRONG_INLINE void pstoreu<float>(float*  to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
 template<> EIGEN_STRONG_INLINE void pstoreu<int>(int*      to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }

-template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { __pld(addr); }
-template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*     addr) { __pld(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*     addr) { EIGEN_ARM_PREFETCH(addr); }

 // FIXME only store the 2 first elements ?
 template<> EIGEN_STRONG_INLINE float  pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -52,7 +52,7 @@ Packet4f plog<Packet4f>(const Packet4f& _x)

  Packet4i emm0;

-  Packet4f invalid_mask = _mm_cmplt_ps(x, _mm_setzero_ps());
+  Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps()); // not greater equal is true if x is NaN
  Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());

  x = pmax(x, p4f_min_norm_pos);  /* cut off denormalized stuff */
@@ -166,7 +166,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
  emm0 = _mm_cvttps_epi32(fx);
  emm0 = _mm_add_epi32(emm0, p4i_0x7f);
  emm0 = _mm_slli_epi32(emm0, 23);
-  return pmul(y, _mm_castsi128_ps(emm0));
+  return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x);
 }
 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 Packet2d pexp<Packet2d>(const Packet2d& _x)
@@ -239,7 +239,7 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
  emm0 = _mm_add_epi32(emm0, p4i_1023_0);
  emm0 = _mm_slli_epi32(emm0, 20);
  emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
-  return pmul(x, _mm_castsi128_pd(emm0));
+  return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
 }

 /* evaluation of 4 sines at onces, using SSE2 intrinsics.
@@ -442,8 +442,11 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
  return _mm_xor_ps(y, sign_bit);
 }

+#if EIGEN_FAST_MATH
+
 // This is based on Quake3's fast inverse square root.
 // For detail see here: http://www.beyond3d.com/content/articles/8/
+// It lacks 1 (or 2 bits in some rare cases) of precision, and does not handle negative, +inf, or denormalized numbers correctly.
 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 Packet4f psqrt<Packet4f>(const Packet4f& _x)
 {
@@ -457,6 +460,14 @@ Packet4f psqrt<Packet4f>(const Packet4f& _x)
  return pmul(_x,x);
 }

+#else
+
+template<> EIGEN_STRONG_INLINE Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
+
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
+
 } // end namespace internal

 } // end namespace Eigen
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -83,7 +83,8 @@ template<> struct packet_traits<double> : default_packet_traits
    size=2,

    HasDiv  = 1,
-    HasExp  = 1
+    HasExp  = 1,
+    HasSqrt = 1
  };
 };
 template<> struct packet_traits<int>    : default_packet_traits
@@ -507,8 +508,8 @@ template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
  // for GCC (eg., it does not like using std::min after the pstore !!)
  EIGEN_ALIGN16 int aux[4];
  pstore(aux, a);
-  register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
-  register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
+  int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
+  int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
  return aux0<aux2 ? aux0 : aux2;
 }

@@ -528,8 +529,8 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
  // for GCC (eg., it does not like using std::min after the pstore !!)
  EIGEN_ALIGN16 int aux[4];
  pstore(aux, a);
-  register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
-  register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
+  int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
+  int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
  return aux0>aux2 ? aux0 : aux2;
 }

--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -90,6 +90,7 @@ struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
            | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),

      CoeffReadCost = InnerSize == Dynamic ? Dynamic
+                    : InnerSize == 0 ? 0
                    : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
                      + (InnerSize - 1) * NumTraits<Scalar>::AddCost,

@@ -133,7 +134,7 @@ class CoeffBasedProduct
    };

    typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
-                                   Unroll ? InnerSize-1 : Dynamic,
+                                   Unroll ? (InnerSize==0 ? 0 : InnerSize-1) : Dynamic,
                                   _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;

    typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
@@ -184,7 +185,7 @@ class CoeffBasedProduct
    {
      PacketScalar res;
      internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
-                              Unroll ? InnerSize-1 : Dynamic,
+                              Unroll ? (InnerSize==0 ? 0 : InnerSize-1) : Dynamic,
                              _LhsNested, _RhsNested, PacketScalar, LoadMode>
        ::run(row, col, m_lhs, m_rhs, res);
      return res;
@@ -262,10 +263,7 @@ struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
  typedef typename Lhs::Index Index;
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
  {
-    eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
-    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
-      for(Index i = 1; i < lhs.cols(); ++i)
-        res += lhs.coeff(row, i) * rhs.coeff(i, col);
+    res = (lhs.row(row).transpose().cwiseProduct( rhs.col(col) )).sum();
  }
 };

--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -1128,6 +1128,8 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, Pack1, Pack2, StorageOrder,
  enum { PacketSize = packet_traits<Scalar>::size };

  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+  EIGEN_UNUSED_VARIABLE(stride)
+  EIGEN_UNUSED_VARIABLE(offset)
  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
  eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
@@ -1215,6 +1217,8 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, Pan
  ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
 {
  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
+  EIGEN_UNUSED_VARIABLE(stride)
+  EIGEN_UNUSED_VARIABLE(offset)
  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
  Index packet_cols = (cols/nr) * nr;
@@ -1266,6 +1270,8 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, Pan
  ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
 {
  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
+  EIGEN_UNUSED_VARIABLE(stride)
+  EIGEN_UNUSED_VARIABLE(offset)
  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
  Index packet_cols = (cols/nr) * nr;
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -52,11 +52,7 @@ EIGEN_DONT_INLINE static void run(
  Index rows, Index cols,
  const LhsScalar* lhs, Index lhsStride,
  const RhsScalar* rhs, Index rhsIncr,
-  ResScalar* res, Index
-  #ifdef EIGEN_INTERNAL_DEBUGGING
-    resIncr
-  #endif
-  , RhsScalar alpha);
+  ResScalar* res, Index resIncr, RhsScalar alpha);
 };

 template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
@@ -64,12 +60,9 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,Co
  Index rows, Index cols,
  const LhsScalar* lhs, Index lhsStride,
  const RhsScalar* rhs, Index rhsIncr,
-  ResScalar* res, Index
-  #ifdef EIGEN_INTERNAL_DEBUGGING
-    resIncr
-  #endif
-  , RhsScalar alpha)
+  ResScalar* res, Index resIncr, RhsScalar alpha)
 {
+  EIGEN_UNUSED_VARIABLE(resIncr)
  eigen_internal_assert(resIncr==1);
  #ifdef _EIGEN_ACCUMULATE_PACKETS
  #error _EIGEN_ACCUMULATE_PACKETS has already been defined
@@ -265,7 +258,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,Co
        // process aligned result's coeffs
        if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
          for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
-            pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
+            pstore(&res[i], pcj.pmadd(pload<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
        else
          for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
            pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -79,8 +79,8 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
  for (Index j=FirstTriangular ? bound : 0;
       j<(FirstTriangular ? size : bound);j+=2)
  {
-    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
-    register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
+    const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
+    const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;

    Scalar t0 = cjAlpha * rhs[j];
    Packet ptmp0 = pset1<Packet>(t0);
@@ -147,7 +147,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
  }
  for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
  {
-    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
+    const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;

    Scalar t1 = cjAlpha * rhs[j];
    Scalar t2(0);
--- a/Eigen/src/Core/util/MKL_support.h
+++ b/Eigen/src/Core/util/MKL_support.h
@@ -54,8 +54,25 @@
 #endif

 #if defined EIGEN_USE_MKL
+#   include <mkl.h> 
+/*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/
+#   ifndef INTEL_MKL_VERSION
+#       undef EIGEN_USE_MKL /* INTEL_MKL_VERSION is not even defined on older versions */
+#   elif INTEL_MKL_VERSION < 100305    /* the intel-mkl-103-release-notes say this was when the lapacke.h interface was added*/
+#       undef EIGEN_USE_MKL
+#   endif
+#   ifndef EIGEN_USE_MKL
+    /*If the MKL version is too old, undef everything*/
+#       undef   EIGEN_USE_MKL_ALL
+#       undef   EIGEN_USE_BLAS
+#       undef   EIGEN_USE_LAPACKE
+#       undef   EIGEN_USE_MKL_VML
+#       undef   EIGEN_USE_LAPACKE_STRICT
+#       undef   EIGEN_USE_LAPACKE
+#   endif
+#endif

-#include <mkl.h>
+#if defined EIGEN_USE_MKL
 #include <mkl_lapacke.h>
 #define EIGEN_MKL_VML_THRESHOLD 128

--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -12,8 +12,8 @@
 #define EIGEN_MACROS_H

 #define EIGEN_WORLD_VERSION 3
-#define EIGEN_MAJOR_VERSION 1
-#define EIGEN_MINOR_VERSION 91
+#define EIGEN_MAJOR_VERSION 2
+#define EIGEN_MINOR_VERSION 4

 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                      (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -96,6 +96,13 @@
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t
 #endif

+// Cross compiler wrapper around LLVM's __has_builtin
+#ifdef __has_builtin
+#  define EIGEN_HAS_BUILTIN(x) __has_builtin(x)
+#else
+#  define EIGEN_HAS_BUILTIN(x) 0
+#endif
+
 /** Allows to disable some optimizations which might affect the accuracy of the result.
  * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
  * They currently include:
@@ -238,11 +245,16 @@
 #endif

 // Suppresses 'unused variable' warnings.
-#define EIGEN_UNUSED_VARIABLE(var) (void)var;
+namespace Eigen {
+  namespace internal {
+    template<typename T> void ignore_unused_variable(const T&) {}
+  }
+}
+#define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var);

 #if !defined(EIGEN_ASM_COMMENT)
  #if (defined __GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
-    #define EIGEN_ASM_COMMENT(X)  asm("#" X)
+    #define EIGEN_ASM_COMMENT(X)  __asm__("#" X)
  #else
    #define EIGEN_ASM_COMMENT(X)
  #endif
@@ -284,7 +296,8 @@
 #endif

 #ifndef EIGEN_STACK_ALLOCATION_LIMIT
-#define EIGEN_STACK_ALLOCATION_LIMIT 20000
+// 131072 == 128 KB
+#define EIGEN_STACK_ALLOCATION_LIMIT 131072
 #endif

 #ifndef EIGEN_DEFAULT_IO_FORMAT
@@ -300,7 +313,7 @@
 // just an empty macro !
 #define EIGEN_EMPTY

-#if defined(_MSC_VER) && (!defined(__INTEL_COMPILER))
+#if defined(_MSC_VER) && (_MSC_VER < 1900) && (!defined(__INTEL_COMPILER))
 #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
  using Base::operator =;
 #elif defined(__clang__) // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -63,7 +63,7 @@
 // Currently, let's include it only on unix systems:
 #if defined(__unix__) || defined(__unix)
  #include <unistd.h>
-  #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
+  #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || (defined __PGI) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
    #define EIGEN_HAS_POSIX_MEMALIGN 1
  #endif
 #endif
@@ -272,12 +272,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
  // The defined(_mm_free) is just here to verify that this MSVC version
  // implements _mm_malloc/_mm_free based on the corresponding _aligned_
  // functions. This may not always be the case and we just try to be safe.
-  #if defined(_MSC_VER) && defined(_mm_free)
+  #if defined(_MSC_VER) && (!defined(_WIN32_WCE)) && defined(_mm_free)
    result = _aligned_realloc(ptr,new_size,16);
  #else
    result = generic_aligned_realloc(ptr,new_size,old_size);
  #endif
-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
  result = _aligned_realloc(ptr,new_size,16);
 #else
  result = handmade_aligned_realloc(ptr,new_size,old_size);
@@ -417,6 +417,8 @@ template<typename T, bool Align> inline T* conditional_aligned_realloc_new(T* pt

 template<typename T, bool Align> inline T* conditional_aligned_new_auto(size_t size)
 {
+  if(size==0)
+    return 0; // short-cut. Also fixes Bug 884
  check_size_for_overflow<T>(size);
  T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
  if(NumTraits<T>::RequireInitialization)
@@ -464,9 +466,8 @@ template<typename T, bool Align> inline void conditional_aligned_delete_auto(T *
 template<typename Scalar, typename Index>
 static inline Index first_aligned(const Scalar* array, Index size)
 {
-  enum { PacketSize = packet_traits<Scalar>::size,
-         PacketAlignedMask = PacketSize-1
-  };
+  static const Index PacketSize = packet_traits<Scalar>::size;
+  static const Index PacketAlignedMask = PacketSize-1;

  if(PacketSize==1)
  {
@@ -578,7 +579,7 @@ template<typename T> class aligned_stack_memory_handler
  */
 #ifdef EIGEN_ALLOCA

-  #ifdef __arm__
+  #if defined(__arm__) || defined(_WIN32)
    #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+16)) & ~(size_t(15))) + 16)
  #else
    #define EIGEN_ALIGNED_ALLOCA EIGEN_ALLOCA
@@ -612,7 +613,6 @@ template<typename T> class aligned_stack_memory_handler
      void* operator new(size_t size, const std::nothrow_t&) throw() { \
        try { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
        catch (...) { return 0; } \
-        return 0; \
      }
  #else
    #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
@@ -634,7 +634,9 @@ template<typename T> class aligned_stack_memory_handler
      /* memory allocated we can safely let the default implementation handle */ \
      /* this particular case. */ \
      static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
+      static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \
      void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \
+      void operator delete[](void * memory, void *ptr) throw() { return ::operator delete[](memory,ptr); } \
      /* nothrow-new (returns zero instead of std::bad_alloc) */ \
      EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
      void operator delete(void *ptr, const std::nothrow_t&) throw() { \
@@ -729,15 +731,6 @@ public:
        ::new( p ) T( value );
    }

-    // Support for c++11
-#if (__cplusplus >= 201103L)
-    template<typename... Args>
-    void  construct(pointer p, Args&&... args)
-    {
-      ::new(p) T(std::forward<Args>(args)...);
-    }
-#endif
-
    void destroy( pointer p )
    {
        p->~T();
@@ -784,9 +777,9 @@ namespace internal {

 #ifdef EIGEN_CPUID

-inline bool cpuid_is_vendor(int abcd[4], const char* vendor)
+inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
 {
-  return abcd[1]==(reinterpret_cast<const int*>(vendor))[0] && abcd[3]==(reinterpret_cast<const int*>(vendor))[1] && abcd[2]==(reinterpret_cast<const int*>(vendor))[2];
+  return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
 }

 inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
@@ -928,13 +921,16 @@ inline void queryCacheSizes(int& l1, int& l2, int& l3)
 {
  #ifdef EIGEN_CPUID
  int abcd[4];
+  const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
+  const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
+  const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"

  // identify the CPU vendor
  EIGEN_CPUID(abcd,0x0,0);
  int max_std_funcs = abcd[1];
-  if(cpuid_is_vendor(abcd,"GenuineIntel"))
+  if(cpuid_is_vendor(abcd,GenuineIntel))
    queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
-  else if(cpuid_is_vendor(abcd,"AuthenticAMD") || cpuid_is_vendor(abcd,"AMDisbetter!"))
+  else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
    queryCacheSizes_amd(l1,l2,l3);
  else
    // by default let's use Intel's API
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -90,7 +90,9 @@
        YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
        THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
        THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
-        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG
+        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
+        IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
+        STORAGE_LAYOUT_DOES_NOT_MATCH
      };
    };

--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -341,7 +341,7 @@ template<typename T, int n=1, typename PlainObject = typename eval<T>::type> str
 };

 template<typename T>
-T* const_cast_ptr(const T* ptr)
+inline T* const_cast_ptr(const T* ptr)
 {
  return const_cast<T*>(ptr);
 }
--- a/Eigen/src/Eigen2Support/LeastSquares.h
+++ b/Eigen/src/Eigen2Support/LeastSquares.h
@@ -147,7 +147,6 @@ void fitHyperplane(int numPoints,

  // compute the covariance matrix
  CovMatrixType covMat = CovMatrixType::Zero(size, size);
-  VectorType remean = VectorType::Zero(size);
  for(int i = 0; i < numPoints; ++i)
  {
    VectorType diff = (*(points[i]) - mean).conjugate();
--- a/Eigen/src/Eigen2Support/SVD.h
+++ b/Eigen/src/Eigen2Support/SVD.h
@@ -512,8 +512,7 @@ template<typename MatrixType>
 template<typename OtherDerived, typename ResultType>
 bool SVD<MatrixType>::solve(const MatrixBase<OtherDerived> &b, ResultType* result) const
 {
-  const int rows = m_matU.rows();
-  ei_assert(b.rows() == rows);
+  ei_assert(b.rows() == m_matU.rows());

  Scalar maxVal = m_sigma.cwise().abs().maxCoeff();
  for (int j=0; j<b.cols(); ++j)
--- a/Eigen/src/Eigenvalues/RealQZ.h
+++ b/Eigen/src/Eigenvalues/RealQZ.h
@@ -313,7 +313,7 @@ namespace Eigen {
      using std::abs;
      using std::sqrt;
      const Index dim=m_S.cols();
-      if (abs(m_S.coeff(i+1,i)==Scalar(0)))
+      if (abs(m_S.coeff(i+1,i))==Scalar(0))
        return;
      Index z = findSmallDiagEntry(i,i+1);
      if (z==i-1)
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@@ -563,7 +563,6 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
    if(computeEigenvectors)
    {
      Scalar safeNorm2 = Eigen::NumTraits<Scalar>::epsilon();
-      safeNorm2 *= safeNorm2;
      if((eivals(2)-eivals(0))<=Eigen::NumTraits<Scalar>::epsilon())
      {
        eivecs.setIdentity();
@@ -577,7 +576,7 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
        Scalar d0 = eivals(2) - eivals(1);
        Scalar d1 = eivals(1) - eivals(0);
        int k =  d0 > d1 ? 2 : 0;
-        d0 = d0 > d1 ? d1 : d0;
+        d0 = d0 > d1 ? d0 : d1;

        tmp.diagonal().array () -= eivals(k);
        VectorType cross;
@@ -585,19 +584,25 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
        n = (cross = tmp.row(0).cross(tmp.row(1))).squaredNorm();

        if(n>safeNorm2)
+        {
          eivecs.col(k) = cross / sqrt(n);
+        }
        else
        {
          n = (cross = tmp.row(0).cross(tmp.row(2))).squaredNorm();

          if(n>safeNorm2)
+          {
            eivecs.col(k) = cross / sqrt(n);
+          }
          else
          {
            n = (cross = tmp.row(1).cross(tmp.row(2))).squaredNorm();

            if(n>safeNorm2)
+            {
              eivecs.col(k) = cross / sqrt(n);
+            }
            else
            {
              // the input matrix and/or the eigenvaues probably contains some inf/NaN,
@@ -617,12 +622,16 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
        tmp.diagonal().array() -= eivals(1);

        if(d0<=Eigen::NumTraits<Scalar>::epsilon())
+        {
          eivecs.col(1) = eivecs.col(k).unitOrthogonal();
+        }
        else
        {
-          n = (cross = eivecs.col(k).cross(tmp.row(0).normalized())).squaredNorm();
+          n = (cross = eivecs.col(k).cross(tmp.row(0))).squaredNorm();
          if(n>safeNorm2)
+          {
            eivecs.col(1) = cross / sqrt(n);
+          }
          else
          {
            n = (cross = eivecs.col(k).cross(tmp.row(1))).squaredNorm();
@@ -636,13 +645,14 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
              else
              {
                // we should never reach this point,
-                // if so the last two eigenvalues are likely to ve very closed to each other
+                // if so the last two eigenvalues are likely to be very close to each other
                eivecs.col(1) = eivecs.col(k).unitOrthogonal();
              }
            }
          }

          // make sure that eivecs[1] is orthogonal to eivecs[2]
+          // FIXME: this step should not be needed
          Scalar d = eivecs.col(1).dot(eivecs.col(k));
          eivecs.col(1) = (eivecs.col(1) - d * eivecs.col(k)).normalized();
        }
--- a/Eigen/src/Geometry/EulerAngles.h
+++ b/Eigen/src/Geometry/EulerAngles.h
@@ -28,7 +28,7 @@ namespace Eigen {
  *      * AngleAxisf(ea[2], Vector3f::UnitZ()); \endcode
  * This corresponds to the right-multiply conventions (with right hand side frames).
  * 
-  * The returned angles are in the ranges [0:pi]x[0:pi]x[-pi:pi].
+  * The returned angles are in the ranges [0:pi]x[-pi:pi]x[-pi:pi].
  * 
  * \sa class AngleAxis
  */
--- a/Eigen/src/Geometry/Hyperplane.h
+++ b/Eigen/src/Geometry/Hyperplane.h
@@ -100,7 +100,17 @@ public:
  {
    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 3)
    Hyperplane result(p0.size());
-    result.normal() = (p2 - p0).cross(p1 - p0).normalized();
+    VectorType v0(p2 - p0), v1(p1 - p0);
+    result.normal() = v0.cross(v1);
+    RealScalar norm = result.normal().norm();
+    if(norm <= v0.norm() * v1.norm() * NumTraits<RealScalar>::epsilon())
+    {
+      Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose();
+      JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV);
+      result.normal() = svd.matrixV().col(2);
+    }
+    else
+      result.normal() /= norm;
    result.offset() = -p0.dot(result.normal());
    return result;
  }
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -150,10 +150,6 @@ public:
  /** \returns the conjugated quaternion */
  Quaternion<Scalar> conjugate() const;

-  /** \returns an interpolation for a constant motion between \a other and \c *this
-    * \a t in [0;1]
-    * see http://en.wikipedia.org/wiki/Slerp
-    */
  template<class OtherDerived> Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;

  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
@@ -194,11 +190,11 @@ public:
  * \brief The quaternion class used to represent 3D orientations and rotations
  *
  * \tparam _Scalar the scalar type, i.e., the type of the coefficients
-  * \tparam _Options controls the memory alignement of the coeffecients. Can be \# AutoAlign or \# DontAlign. Default is AutoAlign.
+  * \tparam _Options controls the memory alignment of the coefficients. Can be \# AutoAlign or \# DontAlign. Default is AutoAlign.
  *
  * This class represents a quaternion \f$ w+xi+yj+zk \f$ that is a convenient representation of
  * orientations and rotations of objects in three dimensions. Compared to other representations
-  * like Euler angles or 3x3 matrices, quatertions offer the following advantages:
+  * like Euler angles or 3x3 matrices, quaternions offer the following advantages:
  * \li \b compact storage (4 scalars)
  * \li \b efficient to compose (28 flops),
  * \li \b stable spherical interpolation
@@ -207,6 +203,8 @@ public:
  * \li \c Quaternionf for \c float
  * \li \c Quaterniond for \c double
  *
+  * \warning Operations interpreting the quaternion as rotation have undefined behavior if the quaternion is not normalized.
+  *
  * \sa  class AngleAxis, class Transform
  */

@@ -348,7 +346,7 @@ class Map<const Quaternion<_Scalar>, _Options >

    /** Constructs a Mapped Quaternion object from the pointer \a coeffs
      *
-      * The pointer \a coeffs must reference the four coeffecients of Quaternion in the following order:
+      * The pointer \a coeffs must reference the four coefficients of Quaternion in the following order:
      * \code *coeffs == {x, y, z, w} \endcode
      *
      * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
@@ -385,7 +383,7 @@ class Map<Quaternion<_Scalar>, _Options >

    /** Constructs a Mapped Quaternion object from the pointer \a coeffs
      *
-      * The pointer \a coeffs must reference the four coeffecients of Quaternion in the following order:
+      * The pointer \a coeffs must reference the four coefficients of Quaternion in the following order:
      * \code *coeffs == {x, y, z, w} \endcode
      *
      * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
@@ -399,16 +397,16 @@ class Map<Quaternion<_Scalar>, _Options >
 };

 /** \ingroup Geometry_Module
-  * Map an unaligned array of single precision scalar as a quaternion */
+  * Map an unaligned array of single precision scalars as a quaternion */
 typedef Map<Quaternion<float>, 0>         QuaternionMapf;
 /** \ingroup Geometry_Module
-  * Map an unaligned array of double precision scalar as a quaternion */
+  * Map an unaligned array of double precision scalars as a quaternion */
 typedef Map<Quaternion<double>, 0>        QuaternionMapd;
 /** \ingroup Geometry_Module
-  * Map a 16-bits aligned array of double precision scalars as a quaternion */
+  * Map a 16-byte aligned array of single precision scalars as a quaternion */
 typedef Map<Quaternion<float>, Aligned>   QuaternionMapAlignedf;
 /** \ingroup Geometry_Module
-  * Map a 16-bits aligned array of double precision scalars as a quaternion */
+  * Map a 16-byte aligned array of double precision scalars as a quaternion */
 typedef Map<Quaternion<double>, Aligned>  QuaternionMapAlignedd;

 /***************************************************************************
@@ -468,7 +466,7 @@ QuaternionBase<Derived>::_transformVector(Vector3 v) const
    // Note that this algorithm comes from the optimization by hand
    // of the conversion to a Matrix followed by a Matrix/Vector product.
    // It appears to be much faster than the common algorithm found
-    // in the litterature (30 versus 39 flops). It also requires two
+    // in the literature (30 versus 39 flops). It also requires two
    // Vector3 as temporaries.
    Vector3 uv = this->vec().cross(v);
    uv += uv;
@@ -579,7 +577,7 @@ inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Deri
  Scalar c = v1.dot(v0);

  // if dot == -1, vectors are nearly opposites
-  // => accuraletly compute the rotation axis by computing the
+  // => accurately compute the rotation axis by computing the
  //    intersection of the two planes. This is done by solving:
  //       x^T v0 = 0
  //       x^T v1 = 0
@@ -588,7 +586,7 @@ inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Deri
  //    which yields a singular value problem
  if (c < Scalar(-1)+NumTraits<Scalar>::dummy_precision())
  {
-    c = max<Scalar>(c,-1);
+    c = (max)(c,Scalar(-1));
    Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose();
    JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV);
    Vector3 axis = svd.matrixV().col(2);
@@ -671,14 +669,19 @@ QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& oth
 {
  using std::acos;
  using std::abs;
-  double d = abs(this->dot(other));
-  if (d>=1.0)
+  Scalar d = abs(this->dot(other));
+  if (d>=Scalar(1))
    return Scalar(0);
-  return static_cast<Scalar>(2 * acos(d));
+  return Scalar(2) * acos(d);
 }

+ 
+    
 /** \returns the spherical linear interpolation between the two quaternions
-  * \c *this and \a other at the parameter \a t
+  * \c *this and \a other at the parameter \a t in [0;1].
+  * 
+  * This represents an interpolation for a constant motion between \c *this and \a other,
+  * see also http://en.wikipedia.org/wiki/Slerp.
  */
 template <class Derived>
 template <class OtherDerived>
--- a/Eigen/src/Geometry/Rotation2D.h
+++ b/Eigen/src/Geometry/Rotation2D.h
@@ -60,6 +60,9 @@ public:

  /** Construct a 2D counter clock wise rotation from the angle \a a in radian. */
  inline Rotation2D(const Scalar& a) : m_angle(a) {}
+  
+  /** Default constructor wihtout initialization. The represented rotation is undefined. */
+  Rotation2D() {}

  /** \returns the rotation angle */
  inline Scalar angle() const { return m_angle; }
@@ -81,10 +84,10 @@ public:
  /** Applies the rotation to a 2D vector */
  Vector2 operator* (const Vector2& vec) const
  { return toRotationMatrix() * vec; }
-
+  
  template<typename Derived>
  Rotation2D& fromRotationMatrix(const MatrixBase<Derived>& m);
-  Matrix2 toRotationMatrix(void) const;
+  Matrix2 toRotationMatrix() const;

  /** \returns the spherical interpolation between \c *this and \a other using
    * parameter \a t. It is in fact equivalent to a linear interpolation.
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -62,6 +62,8 @@ struct transform_construct_from_matrix;

 template<typename TransformType> struct transform_take_affine_part;

+template<int Mode> struct transform_make_affine;
+
 } // end namespace internal

 /** \geometry_module \ingroup Geometry_Module
@@ -194,9 +196,9 @@ public:
  /** type of the matrix used to represent the linear part of the transformation */
  typedef Matrix<Scalar,Dim,Dim,Options> LinearMatrixType;
  /** type of read/write reference to the linear part of the transformation */
-  typedef Block<MatrixType,Dim,Dim,int(Mode)==(AffineCompact)> LinearPart;
+  typedef Block<MatrixType,Dim,Dim,int(Mode)==(AffineCompact) && (Options&RowMajor)==0> LinearPart;
  /** type of read reference to the linear part of the transformation */
-  typedef const Block<ConstMatrixType,Dim,Dim,int(Mode)==(AffineCompact)> ConstLinearPart;
+  typedef const Block<ConstMatrixType,Dim,Dim,int(Mode)==(AffineCompact) && (Options&RowMajor)==0> ConstLinearPart;
  /** type of read/write reference to the affine part of the transformation */
  typedef typename internal::conditional<int(Mode)==int(AffineCompact),
                              MatrixType&,
@@ -230,8 +232,7 @@ public:
  inline Transform()
  {
    check_template_params();
-    if (int(Mode)==Affine)
-      makeAffine();
+    internal::transform_make_affine<(int(Mode)==Affine) ? Affine : AffineCompact>::run(m_matrix);
  }

  inline Transform(const Transform& other)
@@ -530,9 +531,9 @@ public:

  inline Transform& operator=(const UniformScaling<Scalar>& t);
  inline Transform& operator*=(const UniformScaling<Scalar>& s) { return scale(s.factor()); }
-  inline Transform<Scalar,Dim,(int(Mode)==int(Isometry)?Affine:Isometry)> operator*(const UniformScaling<Scalar>& s) const
+  inline Transform<Scalar,Dim,(int(Mode)==int(Isometry)?int(Affine):int(Mode))> operator*(const UniformScaling<Scalar>& s) const
  {
-    Transform<Scalar,Dim,(int(Mode)==int(Isometry)?Affine:Isometry),Options> res = *this;
+    Transform<Scalar,Dim,(int(Mode)==int(Isometry)?int(Affine):int(Mode)),Options> res = *this;
    res.scale(s.factor());
    return res;
  }
@@ -591,11 +592,7 @@ public:
    */
  void makeAffine()
  {
-    if(int(Mode)!=int(AffineCompact))
-    {
-      matrix().template block<1,Dim>(Dim,0).setZero();
-      matrix().coeffRef(Dim,Dim) = Scalar(1);
-    }
+    internal::transform_make_affine<int(Mode)>::run(m_matrix);
  }

  /** \internal
@@ -1079,6 +1076,24 @@ Transform<Scalar,Dim,Mode,Options>::fromPositionOrientationScale(const MatrixBas

 namespace internal {

+template<int Mode>
+struct transform_make_affine
+{
+  template<typename MatrixType>
+  static void run(MatrixType &mat)
+  {
+    static const int Dim = MatrixType::ColsAtCompileTime-1;
+    mat.template block<1,Dim>(Dim,0).setZero();
+    mat.coeffRef(Dim,Dim) = typename MatrixType::Scalar(1);
+  }
+};
+
+template<>
+struct transform_make_affine<AffineCompact>
+{
+  template<typename MatrixType> static void run(MatrixType &) { }
+};
+    
 // selector needed to avoid taking the inverse of a 3x4 matrix
 template<typename TransformType, int Mode=TransformType::Mode>
 struct projective_transform_inverse
--- a/Eigen/src/Geometry/Umeyama.h
+++ b/Eigen/src/Geometry/Umeyama.h
@@ -113,7 +113,7 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo
  const Index n = src.cols(); // number of measurements

  // required for demeaning ...
-  const RealScalar one_over_n = 1 / static_cast<RealScalar>(n);
+  const RealScalar one_over_n = RealScalar(1) / static_cast<RealScalar>(n);

  // computation of mean
  const VectorType src_mean = src.rowwise().sum() * one_over_n;
@@ -136,16 +136,16 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo

  // Eq. (39)
  VectorType S = VectorType::Ones(m);
-  if (sigma.determinant()<0) S(m-1) = -1;
+  if (sigma.determinant()<Scalar(0)) S(m-1) = Scalar(-1);

  // Eq. (40) and (43)
  const VectorType& d = svd.singularValues();
  Index rank = 0; for (Index i=0; i<m; ++i) if (!internal::isMuchSmallerThan(d.coeff(i),d.coeff(0))) ++rank;
  if (rank == m-1) {
-    if ( svd.matrixU().determinant() * svd.matrixV().determinant() > 0 ) {
+    if ( svd.matrixU().determinant() * svd.matrixV().determinant() > Scalar(0) ) {
      Rt.block(0,0,m,m).noalias() = svd.matrixU()*svd.matrixV().transpose();
    } else {
-      const Scalar s = S(m-1); S(m-1) = -1;
+      const Scalar s = S(m-1); S(m-1) = Scalar(-1);
      Rt.block(0,0,m,m).noalias() = svd.matrixU() * S.asDiagonal() * svd.matrixV().transpose();
      S(m-1) = s;
    }
@@ -156,7 +156,7 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo
  if (with_scaling)
  {
    // Eq. (42)
-    const Scalar c = 1/src_var * svd.singularValues().dot(S);
+    const Scalar c = Scalar(1)/src_var * svd.singularValues().dot(S);

    // Eq. (41)
    Rt.col(m).head(m) = dst_mean;
--- a/Eigen/src/Householder/BlockHouseholder.h
+++ b/Eigen/src/Householder/BlockHouseholder.h
@@ -48,7 +48,7 @@ void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vec
  typedef typename MatrixType::Index Index;
  enum { TFactorSize = MatrixType::ColsAtCompileTime };
  Index nbVecs = vectors.cols();
-  Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize> T(nbVecs,nbVecs);
+  Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, ColMajor> T(nbVecs,nbVecs);
  make_block_householder_triangular_factor(T, vectors, hCoeffs);

  const TriangularView<const VectorsType, UnitLower>& V(vectors);
--- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
+++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
@@ -39,7 +39,6 @@ bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x,
  int maxIters = iters;

  int n = mat.cols();
-  x = precond.solve(x);
  VectorType r  = rhs - mat * x;
  VectorType r0 = r;
  
@@ -61,6 +60,7 @@ bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x,
  VectorType s(n), t(n);

  RealScalar tol2 = tol*tol;
+  RealScalar eps2 = NumTraits<Scalar>::epsilon()*NumTraits<Scalar>::epsilon();
  int i = 0;
  int restarts = 0;

@@ -69,7 +69,7 @@ bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x,
    Scalar rho_old = rho;

    rho = r0.dot(r);
-    if (internal::isMuchSmallerThan(rho,r0_sqnorm))
+    if (abs(rho) < eps2*r0_sqnorm)
    {
      // The new residual vector became too orthogonal to the arbitrarily choosen direction r0
      // Let's restart with a new r0:
@@ -142,7 +142,7 @@ struct traits<BiCGSTAB<_MatrixType,_Preconditioner> >
  * SparseMatrix<double> A(n,n);
  * // fill A and b
  * BiCGSTAB<SparseMatrix<double> > solver;
-  * solver(A);
+  * solver.compute(A);
  * x = solver.solve(b);
  * std::cout << "#iterations:     " << solver.iterations() << std::endl;
  * std::cout << "estimated error: " << solver.error()      << std::endl;
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -20,10 +20,11 @@ namespace Eigen {
  *
  * \param MatrixType the type of the matrix of which we are computing the LU decomposition
  *
-  * This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A
-  * is decomposed as A = PLUQ where L is unit-lower-triangular, U is upper-triangular, and P and Q
-  * are permutation matrices. This is a rank-revealing LU decomposition. The eigenvalues (diagonal
-  * coefficients) of U are sorted in such a way that any zeros are at the end.
+  * This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is
+  * decomposed as \f$ A = P^{-1} L U Q^{-1} \f$ where L is unit-lower-triangular, U is
+  * upper-triangular, and P and Q are permutation matrices. This is a rank-revealing LU
+  * decomposition. The eigenvalues (diagonal coefficients) of U are sorted in such a way that any
+  * zeros are at the end.
  *
  * This decomposition provides the generic approach to solving systems of linear equations, computing
  * the rank, invertibility, inverse, kernel, and determinant.
@@ -511,8 +512,8 @@ typename internal::traits<MatrixType>::Scalar FullPivLU<MatrixType>::determinant
 }

 /** \returns the matrix represented by the decomposition,
- * i.e., it returns the product: P^{-1} L U Q^{-1}.
- * This function is provided for debug purpose. */
+ * i.e., it returns the product: \f$ P^{-1} L U Q^{-1} \f$.
+ * This function is provided for debug purposes. */
 template<typename MatrixType>
 MatrixType FullPivLU<MatrixType>::reconstructedMatrix() const
 {
--- a/Eigen/src/OrderingMethods/Ordering.h
+++ b/Eigen/src/OrderingMethods/Ordering.h
@@ -109,7 +109,7 @@ class NaturalOrdering
  * \class COLAMDOrdering
  *
  * Functor computing the \em column \em approximate \em minimum \em degree ordering 
-  * The matrix should be in column-major format
+  * The matrix should be in column-major and \b compressed format (see SparseMatrix::makeCompressed()).
  */
 template<typename Index>
 class COLAMDOrdering
@@ -118,10 +118,14 @@ class COLAMDOrdering
    typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType; 
    typedef Matrix<Index, Dynamic, 1> IndexVector;
    
-    /** Compute the permutation vector form a sparse matrix */
+    /** Compute the permutation vector \a perm form the sparse matrix \a mat
+      * \warning The input sparse matrix \a mat must be in compressed mode (see SparseMatrix::makeCompressed()).
+      */
    template <typename MatrixType>
    void operator() (const MatrixType& mat, PermutationType& perm)
    {
+      eigen_assert(mat.isCompressed() && "COLAMDOrdering requires a sparse matrix in compressed mode. Call .makeCompressed() before passing it to COLAMDOrdering");
+      
      Index m = mat.rows();
      Index n = mat.cols();
      Index nnz = mat.nonZeros();
@@ -132,12 +136,12 @@ class COLAMDOrdering
      Index stats [COLAMD_STATS];
      internal::colamd_set_defaults(knobs);
      
-      Index info;
      IndexVector p(n+1), A(Alen); 
      for(Index i=0; i <= n; i++)   p(i) = mat.outerIndexPtr()[i];
      for(Index i=0; i < nnz; i++)  A(i) = mat.innerIndexPtr()[i];
      // Call Colamd routine to compute the ordering 
-      info = internal::colamd(m, n, Alen, A.data(), p.data(), knobs, stats); 
+      Index info = internal::colamd(m, n, Alen, A.data(), p.data(), knobs, stats); 
+      EIGEN_UNUSED_VARIABLE(info);
      eigen_assert( info && "COLAMD failed " );
      
      perm.resize(n);
--- a/Eigen/src/PardisoSupport/PardisoSupport.h
+++ b/Eigen/src/PardisoSupport/PardisoSupport.h
@@ -219,7 +219,7 @@ class PardisoImpl
    void pardisoInit(int type)
    {
      m_type = type;
-      bool symmetric = abs(m_type) < 10;
+      bool symmetric = std::abs(m_type) < 10;
      m_iparm[0] = 1;   // No solver default
      m_iparm[1] = 3;   // use Metis for the ordering
      m_iparm[2] = 1;   // Numbers of processors, value of OMP_NUM_THREADS
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@@ -76,7 +76,8 @@ template<typename _MatrixType> class ColPivHouseholderQR
        m_colsTranspositions(),
        m_temp(),
        m_colSqNorms(),
-        m_isInitialized(false) {}
+        m_isInitialized(false),
+        m_usePrescribedThreshold(false) {}

    /** \brief Default Constructor with memory preallocation
      *
@@ -349,7 +350,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
      return m_usePrescribedThreshold ? m_prescribedThreshold
      // this formula comes from experimenting (see "LU precision tuning" thread on the list)
      // and turns out to be identical to Higham's formula used already in LDLt.
-                                      : NumTraits<Scalar>::epsilon() * m_qr.diagonalSize();
+                                      : NumTraits<Scalar>::epsilon() * RealScalar(m_qr.diagonalSize());
    }

    /** \returns the number of nonzero pivots in the QR decomposition.
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@@ -63,9 +63,10 @@ template<typename _MatrixType> class FullPivHouseholderQR
    typedef typename MatrixType::Index Index;
    typedef internal::FullPivHouseholderQRMatrixQReturnType<MatrixType> MatrixQReturnType;
    typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
-    typedef Matrix<Index, 1, ColsAtCompileTime, RowMajor, 1, MaxColsAtCompileTime> IntRowVectorType;
+    typedef Matrix<Index, 1,
+                   EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime,RowsAtCompileTime), RowMajor, 1,
+                   EIGEN_SIZE_MIN_PREFER_FIXED(MaxColsAtCompileTime,MaxRowsAtCompileTime)> IntDiagSizeVectorType;
    typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationType;
-    typedef typename internal::plain_col_type<MatrixType, Index>::type IntColVectorType;
    typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
    typedef typename internal::plain_col_type<MatrixType>::type ColVectorType;

@@ -93,10 +94,10 @@ template<typename _MatrixType> class FullPivHouseholderQR
    FullPivHouseholderQR(Index rows, Index cols)
      : m_qr(rows, cols),
        m_hCoeffs((std::min)(rows,cols)),
-        m_rows_transpositions(rows),
-        m_cols_transpositions(cols),
+        m_rows_transpositions((std::min)(rows,cols)),
+        m_cols_transpositions((std::min)(rows,cols)),
        m_cols_permutation(cols),
-        m_temp((std::min)(rows,cols)),
+        m_temp(cols),
        m_isInitialized(false),
        m_usePrescribedThreshold(false) {}

@@ -115,10 +116,10 @@ template<typename _MatrixType> class FullPivHouseholderQR
    FullPivHouseholderQR(const MatrixType& matrix)
      : m_qr(matrix.rows(), matrix.cols()),
        m_hCoeffs((std::min)(matrix.rows(), matrix.cols())),
-        m_rows_transpositions(matrix.rows()),
-        m_cols_transpositions(matrix.cols()),
+        m_rows_transpositions((std::min)(matrix.rows(), matrix.cols())),
+        m_cols_transpositions((std::min)(matrix.rows(), matrix.cols())),
        m_cols_permutation(matrix.cols()),
-        m_temp((std::min)(matrix.rows(), matrix.cols())),
+        m_temp(matrix.cols()),
        m_isInitialized(false),
        m_usePrescribedThreshold(false)
    {
@@ -126,11 +127,12 @@ template<typename _MatrixType> class FullPivHouseholderQR
    }

    /** This method finds a solution x to the equation Ax=b, where A is the matrix of which
-      * *this is the QR decomposition, if any exists.
+      * \c *this is the QR decomposition.
      *
      * \param b the right-hand-side of the equation to solve.
      *
-      * \returns a solution.
+      * \returns the exact or least-square solution if the rank is greater or equal to the number of columns of A,
+      * and an arbitrary solution otherwise.
      *
      * \note The case where b is a matrix is not yet implemented. Also, this
      *       code is space inefficient.
@@ -172,7 +174,7 @@ template<typename _MatrixType> class FullPivHouseholderQR
    }

    /** \returns a const reference to the vector of indices representing the rows transpositions */
-    const IntColVectorType& rowsTranspositions() const
+    const IntDiagSizeVectorType& rowsTranspositions() const
    {
      eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
      return m_rows_transpositions;
@@ -344,7 +346,7 @@ template<typename _MatrixType> class FullPivHouseholderQR
      return m_usePrescribedThreshold ? m_prescribedThreshold
      // this formula comes from experimenting (see "LU precision tuning" thread on the list)
      // and turns out to be identical to Higham's formula used already in LDLt.
-                                      : NumTraits<Scalar>::epsilon() * m_qr.diagonalSize();
+                                      : NumTraits<Scalar>::epsilon() * RealScalar(m_qr.diagonalSize());
    }

    /** \returns the number of nonzero pivots in the QR decomposition.
@@ -368,8 +370,8 @@ template<typename _MatrixType> class FullPivHouseholderQR
  protected:
    MatrixType m_qr;
    HCoeffsType m_hCoeffs;
-    IntColVectorType m_rows_transpositions;
-    IntRowVectorType m_cols_transpositions;
+    IntDiagSizeVectorType m_rows_transpositions;
+    IntDiagSizeVectorType m_cols_transpositions;
    PermutationType m_cols_permutation;
    RowVectorType m_temp;
    bool m_isInitialized, m_usePrescribedThreshold;
@@ -415,10 +417,10 @@ FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(cons

  m_temp.resize(cols);

-  m_precision = NumTraits<Scalar>::epsilon() * size;
+  m_precision = NumTraits<Scalar>::epsilon() * RealScalar(size);

-  m_rows_transpositions.resize(matrix.rows());
-  m_cols_transpositions.resize(matrix.cols());
+  m_rows_transpositions.resize(size);
+  m_cols_transpositions.resize(size);
  Index number_of_transpositions = 0;

  RealScalar biggest(0);
@@ -516,17 +518,6 @@ struct solve_retval<FullPivHouseholderQR<_MatrixType>, Rhs>
                                  dec().hCoeffs().coeff(k), &temp.coeffRef(0));
    }

-    if(!dec().isSurjective())
-    {
-      // is c is in the image of R ?
-      RealScalar biggest_in_upper_part_of_c = c.topRows(   dec().rank()     ).cwiseAbs().maxCoeff();
-      RealScalar biggest_in_lower_part_of_c = c.bottomRows(rows-dec().rank()).cwiseAbs().maxCoeff();
-      // FIXME brain dead
-      const RealScalar m_precision = NumTraits<Scalar>::epsilon() * (std::min)(rows,cols);
-      // this internal:: prefix is needed by at least gcc 3.4 and ICC
-      if(!internal::isMuchSmallerThan(biggest_in_lower_part_of_c, biggest_in_upper_part_of_c, m_precision))
-        return;
-    }
    dec().matrixQR()
       .topLeftCorner(dec().rank(), dec().rank())
       .template triangularView<Upper>()
@@ -548,14 +539,14 @@ template<typename MatrixType> struct FullPivHouseholderQRMatrixQReturnType
 {
 public:
  typedef typename MatrixType::Index Index;
-  typedef typename internal::plain_col_type<MatrixType, Index>::type IntColVectorType;
+  typedef typename FullPivHouseholderQR<MatrixType>::IntDiagSizeVectorType IntDiagSizeVectorType;
  typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
  typedef Matrix<typename MatrixType::Scalar, 1, MatrixType::RowsAtCompileTime, RowMajor, 1,
                 MatrixType::MaxRowsAtCompileTime> WorkVectorType;

  FullPivHouseholderQRMatrixQReturnType(const MatrixType&       qr,
                                        const HCoeffsType&      hCoeffs,
-                                        const IntColVectorType& rowsTranspositions)
+                                        const IntDiagSizeVectorType& rowsTranspositions)
    : m_qr(qr),
      m_hCoeffs(hCoeffs),
      m_rowsTranspositions(rowsTranspositions)
@@ -595,7 +586,7 @@ public:
 protected:
  typename MatrixType::Nested m_qr;
  typename HCoeffsType::Nested m_hCoeffs;
-  typename IntColVectorType::Nested m_rowsTranspositions;
+  typename IntDiagSizeVectorType::Nested m_rowsTranspositions;
 };

 } // end namespace internal
--- a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
+++ b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
@@ -64,7 +64,8 @@ class SPQR
    typedef PermutationMatrix<Dynamic, Dynamic> PermutationType;
  public:
    SPQR() 
-    : m_ordering(SPQR_ORDERING_DEFAULT),
+      : m_isInitialized(false),
+      m_ordering(SPQR_ORDERING_DEFAULT),
      m_allow_tol(SPQR_DEFAULT_TOL),
      m_tolerance (NumTraits<Scalar>::epsilon())
    { 
@@ -72,7 +73,8 @@ class SPQR
    }
    
    SPQR(const _MatrixType& matrix) 
-    : m_ordering(SPQR_ORDERING_DEFAULT),
+    : m_isInitialized(false),
+      m_ordering(SPQR_ORDERING_DEFAULT),
      m_allow_tol(SPQR_DEFAULT_TOL),
      m_tolerance (NumTraits<Scalar>::epsilon())
    {
@@ -82,16 +84,22 @@ class SPQR
    
    ~SPQR()
    {
-      // Calls SuiteSparseQR_free()
+      SPQR_free();
+      cholmod_l_finish(&m_cc);
+    }
+    void SPQR_free()
+    {
      cholmod_l_free_sparse(&m_H, &m_cc);
      cholmod_l_free_sparse(&m_cR, &m_cc);
      cholmod_l_free_dense(&m_HTau, &m_cc);
      std::free(m_E);
      std::free(m_HPinv);
-      cholmod_l_finish(&m_cc);
    }
+
    void compute(const _MatrixType& matrix)
    {
+      if(m_isInitialized) SPQR_free();
+
      MatrixType mat(matrix);
      cholmod_sparse A; 
      A = viewAsCholmod(mat);
@@ -139,7 +147,7 @@ class SPQR
      eigen_assert(b.cols()==1 && "This method is for vectors only");
      
      //Compute Q^T * b
-      Dest y; 
+      typename Dest::PlainObject y;
      y = matrixQ().transpose() * b;
        // Solves with the triangular matrix R
      Index rk = this->rank();
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -375,14 +375,19 @@ struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>
    Scalar z;
    JacobiRotation<Scalar> rot;
    RealScalar n = sqrt(numext::abs2(work_matrix.coeff(p,p)) + numext::abs2(work_matrix.coeff(q,p)));
+    
    if(n==0)
    {
      z = abs(work_matrix.coeff(p,q)) / work_matrix.coeff(p,q);
      work_matrix.row(p) *= z;
      if(svd.computeU()) svd.m_matrixU.col(p) *= conj(z);
-      z = abs(work_matrix.coeff(q,q)) / work_matrix.coeff(q,q);
-      work_matrix.row(q) *= z;
-      if(svd.computeU()) svd.m_matrixU.col(q) *= conj(z);
+      if(work_matrix.coeff(q,q)!=Scalar(0))
+      {
+        z = abs(work_matrix.coeff(q,q)) / work_matrix.coeff(q,q);
+        work_matrix.row(q) *= z;
+        if(svd.computeU()) svd.m_matrixU.col(q) *= conj(z);
+      }
+      // otherwise the second row is already zero, so we have nothing to do.
    }
    else
    {
@@ -412,6 +417,7 @@ void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
                            JacobiRotation<RealScalar> *j_right)
 {
  using std::sqrt;
+  using std::abs;
  Matrix<RealScalar,2,2> m;
  m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)),
       numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q));
@@ -425,9 +431,11 @@ void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
  }
  else
  {
-    RealScalar u = d / t;
-    rot1.c() = RealScalar(1) / sqrt(RealScalar(1) + numext::abs2(u));
-    rot1.s() = rot1.c() * u;
+    RealScalar t2d2 = numext::hypot(t,d);
+    rot1.c() = abs(t)/t2d2;
+    rot1.s() = d/t2d2;
+    if(t<RealScalar(0))
+      rot1.s() = -rot1.s();
  }
  m.applyOnTheLeft(0,1,rot1);
  j_right->makeJacobi(m,0,1);
@@ -528,8 +536,9 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
    JacobiSVD()
      : m_isInitialized(false),
        m_isAllocated(false),
+        m_usePrescribedThreshold(false),
        m_computationOptions(0),
-        m_rows(-1), m_cols(-1)
+        m_rows(-1), m_cols(-1), m_diagSize(0)
    {}


@@ -542,6 +551,7 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
    JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0)
      : m_isInitialized(false),
        m_isAllocated(false),
+        m_usePrescribedThreshold(false),
        m_computationOptions(0),
        m_rows(-1), m_cols(-1)
    {
@@ -561,6 +571,7 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
    JacobiSVD(const MatrixType& matrix, unsigned int computationOptions = 0)
      : m_isInitialized(false),
        m_isAllocated(false),
+        m_usePrescribedThreshold(false),
        m_computationOptions(0),
        m_rows(-1), m_cols(-1)
    {
@@ -662,6 +673,69 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
      eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
      return m_nonzeroSingularValues;
    }
+    
+    /** \returns the rank of the matrix of which \c *this is the SVD.
+      *
+      * \note This method has to determine which singular values should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline Index rank() const
+    {
+      using std::abs;
+      eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
+      if(m_singularValues.size()==0) return 0;
+      RealScalar premultiplied_threshold = m_singularValues.coeff(0) * threshold();
+      Index i = m_nonzeroSingularValues-1;
+      while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i;
+      return i+1;
+    }
+    
+    /** Allows to prescribe a threshold to be used by certain methods, such as rank() and solve(),
+      * which need to determine when singular values are to be considered nonzero.
+      * This is not used for the SVD decomposition itself.
+      *
+      * When it needs to get the threshold value, Eigen calls threshold().
+      * The default is \c NumTraits<Scalar>::epsilon()
+      *
+      * \param threshold The new value to use as the threshold.
+      *
+      * A singular value will be considered nonzero if its value is strictly greater than
+      *  \f$ \vert singular value \vert \leqslant threshold \times \vert max singular value \vert \f$.
+      *
+      * If you want to come back to the default behavior, call setThreshold(Default_t)
+      */
+    JacobiSVD& setThreshold(const RealScalar& threshold)
+    {
+      m_usePrescribedThreshold = true;
+      m_prescribedThreshold = threshold;
+      return *this;
+    }
+
+    /** Allows to come back to the default behavior, letting Eigen use its default formula for
+      * determining the threshold.
+      *
+      * You should pass the special object Eigen::Default as parameter here.
+      * \code svd.setThreshold(Eigen::Default); \endcode
+      *
+      * See the documentation of setThreshold(const RealScalar&).
+      */
+    JacobiSVD& setThreshold(Default_t)
+    {
+      m_usePrescribedThreshold = false;
+      return *this;
+    }
+
+    /** Returns the threshold that will be used by certain methods such as rank().
+      *
+      * See the documentation of setThreshold(const RealScalar&).
+      */
+    RealScalar threshold() const
+    {
+      eigen_assert(m_isInitialized || m_usePrescribedThreshold);
+      return m_usePrescribedThreshold ? m_prescribedThreshold
+                                      : (std::max<Index>)(1,m_diagSize)*NumTraits<Scalar>::epsilon();
+    }

    inline Index rows() const { return m_rows; }
    inline Index cols() const { return m_cols; }
@@ -674,11 +748,12 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
    MatrixVType m_matrixV;
    SingularValuesType m_singularValues;
    WorkMatrixType m_workMatrix;
-    bool m_isInitialized, m_isAllocated;
+    bool m_isInitialized, m_isAllocated, m_usePrescribedThreshold;
    bool m_computeFullU, m_computeThinU;
    bool m_computeFullV, m_computeThinV;
    unsigned int m_computationOptions;
    Index m_nonzeroSingularValues, m_rows, m_cols, m_diagSize;
+    RealScalar m_prescribedThreshold;

    template<typename __MatrixType, int _QRPreconditioner, bool _IsComplex>
    friend struct internal::svd_precondition_2x2_block_to_be_real;
@@ -687,6 +762,7 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD

    internal::qr_preconditioner_impl<MatrixType, QRPreconditioner, internal::PreconditionIfMoreColsThanRows> m_qr_precond_morecols;
    internal::qr_preconditioner_impl<MatrixType, QRPreconditioner, internal::PreconditionIfMoreRowsThanCols> m_qr_precond_morerows;
+    MatrixType m_scaledMatrix;
 };

 template<typename MatrixType, int QRPreconditioner>
@@ -733,8 +809,9 @@ void JacobiSVD<MatrixType, QRPreconditioner>::allocate(Index rows, Index cols, u
                            : 0);
  m_workMatrix.resize(m_diagSize, m_diagSize);
  
-  if(m_cols>m_rows) m_qr_precond_morecols.allocate(*this);
-  if(m_rows>m_cols) m_qr_precond_morerows.allocate(*this);
+  if(m_cols>m_rows)   m_qr_precond_morecols.allocate(*this);
+  if(m_rows>m_cols)   m_qr_precond_morerows.allocate(*this);
+  if(m_cols!=m_cols)  m_scaledMatrix.resize(rows,cols);
 }

 template<typename MatrixType, int QRPreconditioner>
@@ -751,11 +828,21 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
  // limit for very small denormal numbers to be considered zero in order to avoid infinite loops (see bug 286)
  const RealScalar considerAsZero = RealScalar(2) * std::numeric_limits<RealScalar>::denorm_min();

+  // Scaling factor to reduce over/under-flows
+  RealScalar scale = matrix.cwiseAbs().maxCoeff();
+  if(scale==RealScalar(0)) scale = RealScalar(1);
+  
  /*** step 1. The R-SVD step: we use a QR decomposition to reduce to the case of a square matrix */

-  if(!m_qr_precond_morecols.run(*this, matrix) && !m_qr_precond_morerows.run(*this, matrix))
+  if(m_rows!=m_cols)
  {
-    m_workMatrix = matrix.block(0,0,m_diagSize,m_diagSize);
+    m_scaledMatrix = matrix / scale;
+    m_qr_precond_morecols.run(*this, m_scaledMatrix);
+    m_qr_precond_morerows.run(*this, m_scaledMatrix);
+  }
+  else
+  {
+    m_workMatrix = matrix.block(0,0,m_diagSize,m_diagSize) / scale;
    if(m_computeFullU) m_matrixU.setIdentity(m_rows,m_rows);
    if(m_computeThinU) m_matrixU.setIdentity(m_rows,m_diagSize);
    if(m_computeFullV) m_matrixV.setIdentity(m_cols,m_cols);
@@ -781,7 +868,8 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
        using std::max;
        RealScalar threshold = (max)(considerAsZero, precision * (max)(abs(m_workMatrix.coeff(p,p)),
                                                                       abs(m_workMatrix.coeff(q,q))));
-        if((max)(abs(m_workMatrix.coeff(p,q)),abs(m_workMatrix.coeff(q,p))) > threshold)
+        // We compare both values to threshold instead of calling max to be robust to NaN (See bug 791)
+        if(abs(m_workMatrix.coeff(p,q))>threshold || abs(m_workMatrix.coeff(q,p)) > threshold)
        {
          finished = false;

@@ -830,6 +918,8 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
      if(computeV()) m_matrixV.col(pos).swap(m_matrixV.col(i));
    }
  }
+  
+  m_singularValues *= scale;

  m_isInitialized = true;
  return *this;
@@ -851,11 +941,11 @@ struct solve_retval<JacobiSVD<_MatrixType, QRPreconditioner>, Rhs>
    // So A^{-1} = V S^{-1} U^*

    Matrix<Scalar, Dynamic, Rhs::ColsAtCompileTime, 0, _MatrixType::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime> tmp;
-    Index nonzeroSingVals = dec().nonzeroSingularValues();
+    Index rank = dec().rank();
    
-    tmp.noalias() = dec().matrixU().leftCols(nonzeroSingVals).adjoint() * rhs();
-    tmp = dec().singularValues().head(nonzeroSingVals).asDiagonal().inverse() * tmp;
-    dst = dec().matrixV().leftCols(nonzeroSingVals) * tmp;
+    tmp.noalias() = dec().matrixU().leftCols(rank).adjoint() * rhs();
+    tmp = dec().singularValues().head(rank).asDiagonal().inverse() * tmp;
+    dst = dec().matrixV().leftCols(rank) * tmp;
  }
 };
 } // end namespace internal
--- a/Eigen/src/SparseCholesky/SimplicialCholesky.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h
@@ -37,6 +37,7 @@ class SimplicialCholeskyBase : internal::noncopyable
 {
  public:
    typedef typename internal::traits<Derived>::MatrixType MatrixType;
+    typedef typename internal::traits<Derived>::OrderingType OrderingType;
    enum { UpLo = internal::traits<Derived>::UpLo };
    typedef typename MatrixType::Scalar Scalar;
    typedef typename MatrixType::RealScalar RealScalar;
@@ -240,15 +241,16 @@ class SimplicialCholeskyBase : internal::noncopyable
    RealScalar m_shiftScale;
 };

-template<typename _MatrixType, int _UpLo = Lower> class SimplicialLLT;
-template<typename _MatrixType, int _UpLo = Lower> class SimplicialLDLT;
-template<typename _MatrixType, int _UpLo = Lower> class SimplicialCholesky;
+template<typename _MatrixType, int _UpLo = Lower, typename _Ordering = AMDOrdering<typename _MatrixType::Index> > class SimplicialLLT;
+template<typename _MatrixType, int _UpLo = Lower, typename _Ordering = AMDOrdering<typename _MatrixType::Index> > class SimplicialLDLT;
+template<typename _MatrixType, int _UpLo = Lower, typename _Ordering = AMDOrdering<typename _MatrixType::Index> > class SimplicialCholesky;

 namespace internal {

-template<typename _MatrixType, int _UpLo> struct traits<SimplicialLLT<_MatrixType,_UpLo> >
+template<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<SimplicialLLT<_MatrixType,_UpLo,_Ordering> >
 {
  typedef _MatrixType MatrixType;
+  typedef _Ordering OrderingType;
  enum { UpLo = _UpLo };
  typedef typename MatrixType::Scalar                         Scalar;
  typedef typename MatrixType::Index                          Index;
@@ -259,9 +261,10 @@ template<typename _MatrixType, int _UpLo> struct traits<SimplicialLLT<_MatrixTyp
  static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
 };

-template<typename _MatrixType,int _UpLo> struct traits<SimplicialLDLT<_MatrixType,_UpLo> >
+template<typename _MatrixType,int _UpLo, typename _Ordering> struct traits<SimplicialLDLT<_MatrixType,_UpLo,_Ordering> >
 {
  typedef _MatrixType MatrixType;
+  typedef _Ordering OrderingType;
  enum { UpLo = _UpLo };
  typedef typename MatrixType::Scalar                             Scalar;
  typedef typename MatrixType::Index                              Index;
@@ -272,9 +275,10 @@ template<typename _MatrixType,int _UpLo> struct traits<SimplicialLDLT<_MatrixTyp
  static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
 };

-template<typename _MatrixType, int _UpLo> struct traits<SimplicialCholesky<_MatrixType,_UpLo> >
+template<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<SimplicialCholesky<_MatrixType,_UpLo,_Ordering> >
 {
  typedef _MatrixType MatrixType;
+  typedef _Ordering OrderingType;
  enum { UpLo = _UpLo };
 };

@@ -294,11 +298,12 @@ template<typename _MatrixType, int _UpLo> struct traits<SimplicialCholesky<_Matr
  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
  *               or Upper. Default is Lower.
+  * \tparam _Ordering The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<>
  *
-  * \sa class SimplicialLDLT
+  * \sa class SimplicialLDLT, class AMDOrdering, class NaturalOrdering
  */
-template<typename _MatrixType, int _UpLo>
-    class SimplicialLLT : public SimplicialCholeskyBase<SimplicialLLT<_MatrixType,_UpLo> >
+template<typename _MatrixType, int _UpLo, typename _Ordering>
+    class SimplicialLLT : public SimplicialCholeskyBase<SimplicialLLT<_MatrixType,_UpLo,_Ordering> >
 {
 public:
    typedef _MatrixType MatrixType;
@@ -382,11 +387,12 @@ public:
  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
  *               or Upper. Default is Lower.
+  * \tparam _Ordering The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<>
  *
-  * \sa class SimplicialLLT
+  * \sa class SimplicialLLT, class AMDOrdering, class NaturalOrdering
  */
-template<typename _MatrixType, int _UpLo>
-    class SimplicialLDLT : public SimplicialCholeskyBase<SimplicialLDLT<_MatrixType,_UpLo> >
+template<typename _MatrixType, int _UpLo, typename _Ordering>
+    class SimplicialLDLT : public SimplicialCholeskyBase<SimplicialLDLT<_MatrixType,_UpLo,_Ordering> >
 {
 public:
    typedef _MatrixType MatrixType;
@@ -467,8 +473,8 @@ public:
  *
  * \sa class SimplicialLDLT, class SimplicialLLT
  */
-template<typename _MatrixType, int _UpLo>
-    class SimplicialCholesky : public SimplicialCholeskyBase<SimplicialCholesky<_MatrixType,_UpLo> >
+template<typename _MatrixType, int _UpLo, typename _Ordering>
+    class SimplicialCholesky : public SimplicialCholeskyBase<SimplicialCholesky<_MatrixType,_UpLo,_Ordering> >
 {
 public:
    typedef _MatrixType MatrixType;
@@ -612,15 +618,13 @@ void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, CholMatrixTy
 {
  eigen_assert(a.rows()==a.cols());
  const Index size = a.rows();
-  // TODO allows to configure the permutation
  // Note that amd compute the inverse permutation
  {
    CholMatrixType C;
    C = a.template selfadjointView<UpLo>();
-    // remove diagonal entries:
-    // seems not to be needed
-    // C.prune(keep_diag());
-    internal::minimum_degree_ordering(C, m_Pinv);
+    
+    OrderingType ordering;
+    ordering(C,m_Pinv);
  }

  if(m_Pinv.size()>0)
--- a/Eigen/src/SparseCore/AmbiVector.h
+++ b/Eigen/src/SparseCore/AmbiVector.h
@@ -69,7 +69,7 @@ class AmbiVector
      delete[] m_buffer;
      if (size<1000)
      {
-        Index allocSize = (size * sizeof(ListEl))/sizeof(Scalar);
+        Index allocSize = (size * sizeof(ListEl) + sizeof(Scalar) - 1)/sizeof(Scalar);
        m_allocatedElements = (allocSize*sizeof(Scalar))/sizeof(ListEl);
        m_buffer = new Scalar[allocSize];
      }
@@ -88,7 +88,7 @@ class AmbiVector
      Index copyElements = m_allocatedElements;
      m_allocatedElements = (std::min)(Index(m_allocatedElements*1.5),m_size);
      Index allocSize = m_allocatedElements * sizeof(ListEl);
-      allocSize = allocSize/sizeof(Scalar) + (allocSize%sizeof(Scalar)>0?1:0);
+      allocSize = (allocSize + sizeof(Scalar) - 1)/sizeof(Scalar);
      Scalar* newBuffer = new Scalar[allocSize];
      memcpy(newBuffer,  m_buffer,  copyElements * sizeof(ListEl));
      delete[] m_buffer;
--- a/Eigen/src/SparseCore/CompressedStorage.h
+++ b/Eigen/src/SparseCore/CompressedStorage.h
@@ -51,8 +51,8 @@ class CompressedStorage
    CompressedStorage& operator=(const CompressedStorage& other)
    {
      resize(other.size());
-      memcpy(m_values, other.m_values, m_size * sizeof(Scalar));
-      memcpy(m_indices, other.m_indices, m_size * sizeof(Index));
+      internal::smart_copy(other.m_values,  other.m_values  + m_size, m_values);
+      internal::smart_copy(other.m_indices, other.m_indices + m_size, m_indices);
      return *this;
    }

@@ -83,10 +83,10 @@ class CompressedStorage
        reallocate(m_size);
    }

-    void resize(size_t size, float reserveSizeFactor = 0)
+    void resize(size_t size, double reserveSizeFactor = 0)
    {
      if (m_allocatedSize<size)
-        reallocate(size + size_t(reserveSizeFactor*size));
+        reallocate(size + size_t(reserveSizeFactor*double(size)));
      m_size = size;
    }

--- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
@@ -66,9 +66,9 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
    }

    // unordered insertion
-    for(int k=0; k<nnz; ++k)
+    for(Index k=0; k<nnz; ++k)
    {
-      int i = indices[k];
+      Index i = indices[k];
      res.insertBackByOuterInnerUnordered(j,i) = values[i];
      mask[i] = false;
    }
@@ -76,8 +76,8 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
 #if 0
    // alternative ordered insertion code:

-    int t200 = rows/(log2(200)*1.39);
-    int t = (rows*100)/139;
+    Index t200 = rows/(log2(200)*1.39);
+    Index t = (rows*100)/139;

    // FIXME reserve nnz non zeros
    // FIXME implement fast sort algorithms for very small nnz
@@ -90,9 +90,9 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
    if(true)
    {
      if(nnz>1) std::sort(indices.data(),indices.data()+nnz);
-      for(int k=0; k<nnz; ++k)
+      for(Index k=0; k<nnz; ++k)
      {
-        int i = indices[k];
+        Index i = indices[k];
        res.insertBackByOuterInner(j,i) = values[i];
        mask[i] = false;
      }
@@ -100,7 +100,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
    else
    {
      // dense path
-      for(int i=0; i<rows; ++i)
+      for(Index i=0; i<rows; ++i)
      {
        if(mask[i])
        {
@@ -134,8 +134,8 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,C

  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
    ColMajorMatrix resCol(lhs.rows(),rhs.cols());
    internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);
    // sort the non zeros:
@@ -149,7 +149,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
 {
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-     typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
+     typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
     RowMajorMatrix rhsRow = rhs;
     RowMajorMatrix resRow(lhs.rows(), rhs.cols());
     internal::conservative_sparse_sparse_product_impl<RowMajorMatrix,Lhs,RowMajorMatrix>(rhsRow, lhs, resRow);
@@ -162,7 +162,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
 {
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
    RowMajorMatrix lhsRow = lhs;
    RowMajorMatrix resRow(lhs.rows(), rhs.cols());
    internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorMatrix,RowMajorMatrix>(rhs, lhsRow, resRow);
@@ -175,7 +175,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,R
 {
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
    RowMajorMatrix resRow(lhs.rows(), rhs.cols());
    internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);
    res = resRow;
@@ -190,7 +190,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,C

  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
    ColMajorMatrix resCol(lhs.rows(), rhs.cols());
    internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);
    res = resCol;
@@ -202,7 +202,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
 {
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
    ColMajorMatrix lhsCol = lhs;
    ColMajorMatrix resCol(lhs.rows(), rhs.cols());
    internal::conservative_sparse_sparse_product_impl<ColMajorMatrix,Rhs,ColMajorMatrix>(lhsCol, rhs, resCol);
@@ -215,7 +215,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
 {
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
    ColMajorMatrix rhsCol = rhs;
    ColMajorMatrix resCol(lhs.rows(), rhs.cols());
    internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorMatrix,ColMajorMatrix>(lhs, rhsCol, resCol);
@@ -228,8 +228,8 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,R
 {
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
    RowMajorMatrix resRow(lhs.rows(),rhs.cols());
    internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);
    // sort the non zeros:
--- a/Eigen/src/SparseCore/MappedSparseMatrix.h
+++ b/Eigen/src/SparseCore/MappedSparseMatrix.h
@@ -50,6 +50,8 @@ class MappedSparseMatrix
    inline Index cols() const { return IsRowMajor ? m_innerSize : m_outerSize; }
    inline Index innerSize() const { return m_innerSize; }
    inline Index outerSize() const { return m_outerSize; }
+    
+    bool isCompressed() const { return true; }

    //----------------------------------------
    // direct access interface
--- a/Eigen/src/SparseCore/SparseBlock.h
+++ b/Eigen/src/SparseCore/SparseBlock.h
@@ -66,6 +66,10 @@ public:
    typename XprType::Nested m_matrix;
    Index m_outerStart;
    const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
+
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
+  private:
+    Index nonZeros() const;
 };


@@ -80,6 +84,7 @@ class BlockImpl<SparseMatrix<_Scalar, _Options, _Index>,BlockRows,BlockCols,true
    typedef SparseMatrix<_Scalar, _Options, _Index> SparseMatrixType;
    typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _MatrixTypeNested;
    typedef Block<SparseMatrixType, BlockRows, BlockCols, true> BlockType;
+    typedef Block<const SparseMatrixType, BlockRows, BlockCols, true> ConstBlockType;
 public:
    enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };
    EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
@@ -243,6 +248,93 @@ public:

 };

+
+template<typename _Scalar, int _Options, typename _Index, int BlockRows, int BlockCols>
+class BlockImpl<const SparseMatrix<_Scalar, _Options, _Index>,BlockRows,BlockCols,true,Sparse>
+  : public SparseMatrixBase<Block<const SparseMatrix<_Scalar, _Options, _Index>,BlockRows,BlockCols,true> >
+{
+    typedef SparseMatrix<_Scalar, _Options, _Index> SparseMatrixType;
+    typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _MatrixTypeNested;
+    typedef Block<const SparseMatrixType, BlockRows, BlockCols, true> BlockType;
+public:
+    enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };
+    EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
+protected:
+    enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
+public:
+    
+    class InnerIterator: public SparseMatrixType::InnerIterator
+    {
+      public:
+        inline InnerIterator(const BlockType& xpr, Index outer)
+          : SparseMatrixType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
+        {}
+        inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
+        inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
+      protected:
+        Index m_outer;
+    };
+    class ReverseInnerIterator: public SparseMatrixType::ReverseInnerIterator
+    {
+      public:
+        inline ReverseInnerIterator(const BlockType& xpr, Index outer)
+          : SparseMatrixType::ReverseInnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
+        {}
+        inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
+        inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
+      protected:
+        Index m_outer;
+    };
+
+    inline BlockImpl(const SparseMatrixType& xpr, int i)
+      : m_matrix(xpr), m_outerStart(i), m_outerSize(OuterSize)
+    {}
+
+    inline BlockImpl(const SparseMatrixType& xpr, int startRow, int startCol, int blockRows, int blockCols)
+      : m_matrix(xpr), m_outerStart(IsRowMajor ? startRow : startCol), m_outerSize(IsRowMajor ? blockRows : blockCols)
+    {}
+
+    inline const Scalar* valuePtr() const
+    { return m_matrix.valuePtr() + m_matrix.outerIndexPtr()[m_outerStart]; }
+
+    inline const Index* innerIndexPtr() const
+    { return m_matrix.innerIndexPtr() + m_matrix.outerIndexPtr()[m_outerStart]; }
+
+    inline const Index* outerIndexPtr() const
+    { return m_matrix.outerIndexPtr() + m_outerStart; }
+
+    Index nonZeros() const
+    {
+      if(m_matrix.isCompressed())
+        return  std::size_t(m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()])
+              - std::size_t(m_matrix.outerIndexPtr()[m_outerStart]);
+      else if(m_outerSize.value()==0)
+        return 0;
+      else
+        return Map<const Matrix<Index,OuterSize,1> >(m_matrix.innerNonZeroPtr()+m_outerStart, m_outerSize.value()).sum();
+    }
+
+    const Scalar& lastCoeff() const
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(BlockImpl);
+      eigen_assert(nonZeros()>0);
+      if(m_matrix.isCompressed())
+        return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart+1]-1];
+      else
+        return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart]+m_matrix.innerNonZeroPtr()[m_outerStart]-1];
+    }
+
+    EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
+    EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
+
+  protected:
+
+    typename SparseMatrixType::Nested m_matrix;
+    Index m_outerStart;
+    const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
+
+};
+
 //----------

 /** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this
@@ -391,6 +483,8 @@ public:
  protected:
    friend class InnerIterator;
    friend class ReverseInnerIterator;
+    
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)

    typename XprType::Nested m_matrix;
    const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
--- a/Eigen/src/SparseCore/SparseColEtree.h
+++ b/Eigen/src/SparseCore/SparseColEtree.h
@@ -63,6 +63,7 @@ int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowEl
  typedef typename MatrixType::Index Index;
  Index nc = mat.cols(); // Number of columns 
  Index m = mat.rows();
+  Index diagSize = (std::min)(nc,m);
  IndexVector root(nc); // root of subtree of etree 
  root.setZero();
  IndexVector pp(nc); // disjoint sets 
@@ -72,7 +73,7 @@ int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowEl
  Index row,col; 
  firstRowElt.resize(m);
  firstRowElt.setConstant(nc);
-  firstRowElt.segment(0, nc).setLinSpaced(nc, 0, nc-1);
+  firstRowElt.segment(0, diagSize).setLinSpaced(diagSize, 0, diagSize-1);
  bool found_diag;
  for (col = 0; col < nc; col++)
  {
@@ -91,7 +92,7 @@ int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowEl
  Index rset, cset, rroot; 
  for (col = 0; col < nc; col++) 
  {
-    found_diag = false;
+    found_diag = col>=m;
    pp(col) = col; 
    cset = col; 
    root(cset) = col; 
@@ -105,6 +106,7 @@ int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowEl
      Index i = col;
      if(it) i = it.index();
      if (i == col) found_diag = true;
+      
      row = firstRowElt(i);
      if (row >= col) continue; 
      rset = internal::etree_find(row, pp); // Find the name of the set containing row
--- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
+++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
@@ -73,7 +73,8 @@ class CwiseBinaryOpImpl<BinaryOp,Lhs,Rhs,Sparse>::InnerIterator
    typedef internal::sparse_cwise_binary_op_inner_iterator_selector<
      BinaryOp,Lhs,Rhs, InnerIterator> Base;

-    EIGEN_STRONG_INLINE InnerIterator(const CwiseBinaryOpImpl& binOp, Index outer)
+    // NOTE: we have to prefix Index by "typename Lhs::" to avoid an ICE with VC11
+    EIGEN_STRONG_INLINE InnerIterator(const CwiseBinaryOpImpl& binOp, typename Lhs::Index outer)
      : Base(binOp.derived(),outer)
    {}
 };
--- a/Eigen/src/SparseCore/SparseDenseProduct.h
+++ b/Eigen/src/SparseCore/SparseDenseProduct.h
@@ -19,7 +19,10 @@ template<typename Lhs, typename Rhs, int InnerSize> struct SparseDenseProductRet

 template<typename Lhs, typename Rhs> struct SparseDenseProductReturnType<Lhs,Rhs,1>
 {
-  typedef SparseDenseOuterProduct<Lhs,Rhs,false> Type;
+  typedef typename internal::conditional<
+    Lhs::IsRowMajor,
+    SparseDenseOuterProduct<Rhs,Lhs,true>,
+    SparseDenseOuterProduct<Lhs,Rhs,false> >::type Type;
 };

 template<typename Lhs, typename Rhs, int InnerSize> struct DenseSparseProductReturnType
@@ -29,7 +32,10 @@ template<typename Lhs, typename Rhs, int InnerSize> struct DenseSparseProductRet

 template<typename Lhs, typename Rhs> struct DenseSparseProductReturnType<Lhs,Rhs,1>
 {
-  typedef SparseDenseOuterProduct<Rhs,Lhs,true> Type;
+  typedef typename internal::conditional<
+    Rhs::IsRowMajor,
+    SparseDenseOuterProduct<Rhs,Lhs,true>,
+    SparseDenseOuterProduct<Lhs,Rhs,false> >::type Type;
 };

 namespace internal {
@@ -114,18 +120,31 @@ class SparseDenseOuterProduct<Lhs,Rhs,Transpose>::InnerIterator : public _LhsNes
    typedef typename SparseDenseOuterProduct::Index Index;
  public:
    EIGEN_STRONG_INLINE InnerIterator(const SparseDenseOuterProduct& prod, Index outer)
-      : Base(prod.lhs(), 0), m_outer(outer), m_factor(prod.rhs().coeff(outer))
-    {
-    }
+      : Base(prod.lhs(), 0), m_outer(outer), m_factor(get(prod.rhs(), outer, typename internal::traits<Rhs>::StorageKind() ))
+    { }

    inline Index outer() const { return m_outer; }
-    inline Index row() const { return Transpose ? Base::row() : m_outer; }
-    inline Index col() const { return Transpose ? m_outer : Base::row(); }
+    inline Index row() const { return Transpose ? m_outer : Base::index(); }
+    inline Index col() const { return Transpose ? Base::index() : m_outer; }

    inline Scalar value() const { return Base::value() * m_factor; }

  protected:
-    int m_outer;
+    static Scalar get(const _RhsNested &rhs, Index outer, Dense = Dense())
+    {
+      return rhs.coeff(outer);
+    }
+    
+    static Scalar get(const _RhsNested &rhs, Index outer, Sparse = Sparse())
+    {
+      typename Traits::_RhsNested::InnerIterator it(rhs, outer);
+      if (it && it.index()==0)
+        return it.value();
+      
+      return Scalar(0);
+    }
+    
+    Index m_outer;
    Scalar m_factor;
 };

@@ -155,7 +174,7 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, R
  {
    for(Index c=0; c<rhs.cols(); ++c)
    {
-      int n = lhs.outerSize();
+      Index n = lhs.outerSize();
      for(Index j=0; j<n; ++j)
      {
        typename Res::Scalar tmp(0);
@@ -287,15 +306,6 @@ class DenseTimeSparseProduct
    DenseTimeSparseProduct& operator=(const DenseTimeSparseProduct&);
 };

-// sparse * dense
-template<typename Derived>
-template<typename OtherDerived>
-inline const typename SparseDenseProductReturnType<Derived,OtherDerived>::Type
-SparseMatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
-{
-  return typename SparseDenseProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
-}
-
 } // end namespace Eigen

 #endif // EIGEN_SPARSEDENSEPRODUCT_H
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -223,7 +223,7 @@ class SparseMatrix
      
      if(isCompressed())
      {
-        reserve(VectorXi::Constant(outerSize(), 2));
+        reserve(Matrix<Index,Dynamic,1>::Constant(outerSize(), 2));
      }
      return insertUncompressed(row,col);
    }
@@ -402,7 +402,7 @@ class SparseMatrix
      * \sa insertBack, insertBackByOuterInner */
    inline void startVec(Index outer)
    {
-      eigen_assert(m_outerIndex[outer]==int(m_data.size()) && "You must call startVec for each inner vector sequentially");
+      eigen_assert(m_outerIndex[outer]==Index(m_data.size()) && "You must call startVec for each inner vector sequentially");
      eigen_assert(m_outerIndex[outer+1]==0 && "You must call startVec for each inner vector sequentially");
      m_outerIndex[outer+1] = m_outerIndex[outer];
    }
@@ -480,7 +480,7 @@ class SparseMatrix
      if(m_innerNonZeros != 0)
        return; 
      m_innerNonZeros = static_cast<Index*>(std::malloc(m_outerSize * sizeof(Index)));
-      for (int i = 0; i < m_outerSize; i++)
+      for (Index i = 0; i < m_outerSize; i++)
      {
        m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; 
      }
@@ -752,8 +752,8 @@ class SparseMatrix
        else
          for (Index i=0; i<m.outerSize(); ++i)
          {
-            int p = m.m_outerIndex[i];
-            int pe = m.m_outerIndex[i]+m.m_innerNonZeros[i];
+            Index p = m.m_outerIndex[i];
+            Index pe = m.m_outerIndex[i]+m.m_innerNonZeros[i];
            Index k=p;
            for (; k<pe; ++k)
              s << "(" << m.m_data.value(k) << "," << m.m_data.index(k) << ") ";
@@ -939,12 +939,13 @@ void set_from_triplets(const InputIterator& begin, const InputIterator& end, Spa
  EIGEN_UNUSED_VARIABLE(Options);
  enum { IsRowMajor = SparseMatrixType::IsRowMajor };
  typedef typename SparseMatrixType::Scalar Scalar;
-  SparseMatrix<Scalar,IsRowMajor?ColMajor:RowMajor> trMat(mat.rows(),mat.cols());
+  typedef typename SparseMatrixType::Index Index;
+  SparseMatrix<Scalar,IsRowMajor?ColMajor:RowMajor,Index> trMat(mat.rows(),mat.cols());

-  if(begin<end)
+  if(begin!=end)
  {
    // pass 1: count the nnz per inner-vector
-    VectorXi wi(trMat.outerSize());
+    Matrix<Index,Dynamic,1> wi(trMat.outerSize());
    wi.setZero();
    for(InputIterator it(begin); it!=end; ++it)
    {
@@ -1018,11 +1019,11 @@ void SparseMatrix<Scalar,_Options,_Index>::sumupDuplicates()
 {
  eigen_assert(!isCompressed());
  // TODO, in practice we should be able to use m_innerNonZeros for that task
-  VectorXi wi(innerSize());
+  Matrix<Index,Dynamic,1> wi(innerSize());
  wi.fill(-1);
  Index count = 0;
  // for each inner-vector, wi[inner_index] will hold the position of first element into the index/value buffers
-  for(int j=0; j<outerSize(); ++j)
+  for(Index j=0; j<outerSize(); ++j)
  {
    Index start   = count;
    Index oldEnd  = m_outerIndex[j]+m_innerNonZeros[j];
@@ -1081,7 +1082,7 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Opt

    // prefix sum
    Index count = 0;
-    VectorXi positions(dest.outerSize());
+    Matrix<Index,Dynamic,1> positions(dest.outerSize());
    for (Index j=0; j<dest.outerSize(); ++j)
    {
      Index tmp = dest.m_outerIndex[j];
@@ -1177,7 +1178,7 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& Sparse
  size_t p = m_outerIndex[outer+1];
  ++m_outerIndex[outer+1];

-  float reallocRatio = 1;
+  double reallocRatio = 1;
  if (m_data.allocatedSize()<=m_data.size())
  {
    // if there is no preallocated memory, let's reserve a minimum of 32 elements
@@ -1189,13 +1190,13 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& Sparse
    {
      // we need to reallocate the data, to reduce multiple reallocations
      // we use a smart resize algorithm based on the current filling ratio
-      // in addition, we use float to avoid integers overflows
-      float nnzEstimate = float(m_outerIndex[outer])*float(m_outerSize)/float(outer+1);
-      reallocRatio = (nnzEstimate-float(m_data.size()))/float(m_data.size());
+      // in addition, we use double to avoid integers overflows
+      double nnzEstimate = double(m_outerIndex[outer])*double(m_outerSize)/double(outer+1);
+      reallocRatio = (nnzEstimate-double(m_data.size()))/double(m_data.size());
      // furthermore we bound the realloc ratio to:
      //   1) reduce multiple minor realloc when the matrix is almost filled
      //   2) avoid to allocate too much memory when the matrix is almost empty
-      reallocRatio = (std::min)((std::max)(reallocRatio,1.5f),8.f);
+      reallocRatio = (std::min)((std::max)(reallocRatio,1.5),8.);
    }
  }
  m_data.resize(m_data.size()+1,reallocRatio);
--- a/Eigen/src/SparseCore/SparseMatrixBase.h
+++ b/Eigen/src/SparseCore/SparseMatrixBase.h
@@ -105,7 +105,7 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
                     >::type AdjointReturnType;


-    typedef SparseMatrix<Scalar, Flags&RowMajorBit ? RowMajor : ColMajor> PlainObject;
+    typedef SparseMatrix<Scalar, Flags&RowMajorBit ? RowMajor : ColMajor, Index> PlainObject;


 #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -302,8 +302,8 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
        }
        else
        {
-          SparseMatrix<Scalar, RowMajorBit> trans = m;
-          s << static_cast<const SparseMatrixBase<SparseMatrix<Scalar, RowMajorBit> >&>(trans);
+          SparseMatrix<Scalar, RowMajorBit, Index> trans = m;
+          s << static_cast<const SparseMatrixBase<SparseMatrix<Scalar, RowMajorBit, Index> >&>(trans);
        }
      }
      return s;
@@ -358,7 +358,8 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
    /** sparse * dense (returns a dense object unless it is an outer product) */
    template<typename OtherDerived>
    const typename SparseDenseProductReturnType<Derived,OtherDerived>::Type
-    operator*(const MatrixBase<OtherDerived> &other) const;
+    operator*(const MatrixBase<OtherDerived> &other) const
+    { return typename SparseDenseProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived()); }
    
     /** \returns an expression of P H P^-1 where H is the matrix represented by \c *this */
    SparseSymmetricPermutationProduct<Derived,Upper|Lower> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const
--- a/Eigen/src/SparseCore/SparsePermutation.h
+++ b/Eigen/src/SparseCore/SparsePermutation.h
@@ -57,11 +57,11 @@ struct permut_sparsematrix_product_retval
      if(MoveOuter)
      {
        SparseMatrix<Scalar,SrcStorageOrder,Index> tmp(m_matrix.rows(), m_matrix.cols());
-        VectorXi sizes(m_matrix.outerSize());
+        Matrix<Index,Dynamic,1> sizes(m_matrix.outerSize());
        for(Index j=0; j<m_matrix.outerSize(); ++j)
        {
          Index jp = m_permutation.indices().coeff(j);
-          sizes[((Side==OnTheLeft) ^ Transposed) ? jp : j] = m_matrix.innerVector(((Side==OnTheRight) ^ Transposed) ? jp : j).size();
+          sizes[((Side==OnTheLeft) ^ Transposed) ? jp : j] = m_matrix.innerVector(((Side==OnTheRight) ^ Transposed) ? jp : j).nonZeros();
        }
        tmp.reserve(sizes);
        for(Index j=0; j<m_matrix.outerSize(); ++j)
@@ -77,7 +77,7 @@ struct permut_sparsematrix_product_retval
      else
      {
        SparseMatrix<Scalar,int(SrcStorageOrder)==RowMajor?ColMajor:RowMajor,Index> tmp(m_matrix.rows(), m_matrix.cols());
-        VectorXi sizes(tmp.outerSize());
+        Matrix<Index,Dynamic,1> sizes(tmp.outerSize());
        sizes.setZero();
        PermutationMatrix<Dynamic,Dynamic,Index> perm;
        if((Side==OnTheLeft) ^ Transposed)
--- a/Eigen/src/SparseCore/SparseProduct.h
+++ b/Eigen/src/SparseCore/SparseProduct.h
@@ -16,6 +16,7 @@ template<typename Lhs, typename Rhs>
 struct SparseSparseProductReturnType
 {
  typedef typename internal::traits<Lhs>::Scalar Scalar;
+  typedef typename internal::traits<Lhs>::Index Index;
  enum {
    LhsRowMajor = internal::traits<Lhs>::Flags & RowMajorBit,
    RhsRowMajor = internal::traits<Rhs>::Flags & RowMajorBit,
@@ -24,11 +25,11 @@ struct SparseSparseProductReturnType
  };

  typedef typename internal::conditional<TransposeLhs,
-    SparseMatrix<Scalar,0>,
+    SparseMatrix<Scalar,0,Index>,
    typename internal::nested<Lhs,Rhs::RowsAtCompileTime>::type>::type LhsNested;

  typedef typename internal::conditional<TransposeRhs,
-    SparseMatrix<Scalar,0>,
+    SparseMatrix<Scalar,0,Index>,
    typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type>::type RhsNested;

  typedef SparseSparseProduct<LhsNested, RhsNested> Type;
--- a/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h
@@ -75,22 +75,22 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
      * Indeed, the SparseSelfadjointView operand is first copied into a temporary SparseMatrix before computing the product.
      */
    template<typename OtherDerived>
-    SparseSparseProduct<SparseMatrix<Scalar,  ((internal::traits<OtherDerived>::Flags&RowMajorBit) ? RowMajor : ColMajor),Index>, OtherDerived>
+    SparseSparseProduct<typename OtherDerived::PlainObject, OtherDerived>
    operator*(const SparseMatrixBase<OtherDerived>& rhs) const
    {
-      return SparseSparseProduct<SparseMatrix<Scalar, (internal::traits<OtherDerived>::Flags&RowMajorBit) ? RowMajor : ColMajor, Index>, OtherDerived>(*this, rhs.derived());
+      return SparseSparseProduct<typename OtherDerived::PlainObject, OtherDerived>(*this, rhs.derived());
    }
-    
+
    /** \returns an expression of the matrix product between a sparse matrix \a lhs and a sparse self-adjoint matrix \a rhs.
      *
      * Note that there is no algorithmic advantage of performing such a product compared to a general sparse-sparse matrix product.
      * Indeed, the SparseSelfadjointView operand is first copied into a temporary SparseMatrix before computing the product.
      */
    template<typename OtherDerived> friend
-    SparseSparseProduct<OtherDerived, SparseMatrix<Scalar,  ((internal::traits<OtherDerived>::Flags&RowMajorBit) ? RowMajor : ColMajor),Index> >
+    SparseSparseProduct<OtherDerived, typename OtherDerived::PlainObject >
    operator*(const SparseMatrixBase<OtherDerived>& lhs, const SparseSelfAdjointView& rhs)
    {
-      return SparseSparseProduct< OtherDerived, SparseMatrix<Scalar, (internal::traits<OtherDerived>::Flags&RowMajorBit) ? RowMajor : ColMajor, Index> >(lhs.derived(), rhs.derived());
+      return SparseSparseProduct<OtherDerived, typename OtherDerived::PlainObject>(lhs.derived(), rhs);
    }
    
    /** Efficient sparse self-adjoint matrix times dense vector/matrix product */
--- a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
+++ b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
@@ -27,7 +27,7 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
  // make sure to call innerSize/outerSize since we fake the storage order.
  Index rows = lhs.innerSize();
  Index cols = rhs.outerSize();
-  //int size = lhs.outerSize();
+  //Index size = lhs.outerSize();
  eigen_assert(lhs.outerSize() == rhs.innerSize());

  // allocate a temporary buffer
@@ -100,7 +100,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,C
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
  {
    // we need a col-major matrix to hold the result
-    typedef SparseMatrix<typename ResultType::Scalar> SparseTemporaryType;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> SparseTemporaryType;
    SparseTemporaryType _res(res.rows(), res.cols());
    internal::sparse_sparse_product_with_pruning_impl<Lhs,Rhs,SparseTemporaryType>(lhs, rhs, _res, tolerance);
    res = _res;
@@ -126,10 +126,11 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,R
  typedef typename ResultType::RealScalar RealScalar;
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
-    ColMajorMatrix colLhs(lhs);
-    ColMajorMatrix colRhs(rhs);
-    internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrix,ColMajorMatrix,ResultType>(colLhs, colRhs, res, tolerance);
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::Index> ColMajorMatrixLhs;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::Index> ColMajorMatrixRhs;
+    ColMajorMatrixLhs colLhs(lhs);
+    ColMajorMatrixRhs colRhs(rhs);
+    internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,ColMajorMatrixRhs,ResultType>(colLhs, colRhs, res, tolerance);

    // let's transpose the product to get a column x column product
 //     typedef SparseMatrix<typename ResultType::Scalar> SparseTemporaryType;
--- a/Eigen/src/SparseCore/SparseTranspose.h
+++ b/Eigen/src/SparseCore/SparseTranspose.h
@@ -26,7 +26,7 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>
    inline Index nonZeros() const { return derived().nestedExpression().nonZeros(); }
 };

-// NOTE: VC10 trigger an ICE if don't put typename TransposeImpl<MatrixType,Sparse>:: in front of Index,
+// NOTE: VC10 and VC11 trigger an ICE if don't put typename TransposeImpl<MatrixType,Sparse>:: in front of Index,
 // a typedef typename TransposeImpl<MatrixType,Sparse>::Index Index;
 // does not fix the issue.
 // An alternative is to define the nested class in the parent class itself.
@@ -40,8 +40,8 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>::InnerItera
    EIGEN_STRONG_INLINE InnerIterator(const TransposeImpl& trans, typename TransposeImpl<MatrixType,Sparse>::Index outer)
      : Base(trans.derived().nestedExpression(), outer)
    {}
-    Index row() const { return Base::col(); }
-    Index col() const { return Base::row(); }
+    typename TransposeImpl<MatrixType,Sparse>::Index row() const { return Base::col(); }
+    typename TransposeImpl<MatrixType,Sparse>::Index col() const { return Base::row(); }
 };

 template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>::ReverseInnerIterator
@@ -54,8 +54,8 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>::ReverseInn
    EIGEN_STRONG_INLINE ReverseInnerIterator(const TransposeImpl& xpr, typename TransposeImpl<MatrixType,Sparse>::Index outer)
      : Base(xpr.derived().nestedExpression(), outer)
    {}
-    Index row() const { return Base::col(); }
-    Index col() const { return Base::row(); }
+    typename TransposeImpl<MatrixType,Sparse>::Index row() const { return Base::col(); }
+    typename TransposeImpl<MatrixType,Sparse>::Index col() const { return Base::row(); }
 };

 } // end namespace Eigen
--- a/Eigen/src/SparseCore/SparseUtil.h
+++ b/Eigen/src/SparseCore/SparseUtil.h
@@ -84,8 +84,10 @@ template<typename Lhs, typename Rhs>        class DenseTimeSparseProduct;
 template<typename Lhs, typename Rhs, bool Transpose> class SparseDenseOuterProduct;

 template<typename Lhs, typename Rhs> struct SparseSparseProductReturnType;
-template<typename Lhs, typename Rhs, int InnerSize = internal::traits<Lhs>::ColsAtCompileTime> struct DenseSparseProductReturnType;
-template<typename Lhs, typename Rhs, int InnerSize = internal::traits<Lhs>::ColsAtCompileTime> struct SparseDenseProductReturnType;
+template<typename Lhs, typename Rhs,
+         int InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(internal::traits<Lhs>::ColsAtCompileTime,internal::traits<Rhs>::RowsAtCompileTime)> struct DenseSparseProductReturnType;         
+template<typename Lhs, typename Rhs,
+         int InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(internal::traits<Lhs>::ColsAtCompileTime,internal::traits<Rhs>::RowsAtCompileTime)> struct SparseDenseProductReturnType;
 template<typename MatrixType,int UpLo> class SparseSymmetricPermutationProduct;

 namespace internal {
@@ -143,7 +145,7 @@ template<typename T> struct plain_matrix_type<T,Sparse>
  *
  * \sa SparseMatrix::setFromTriplets()
  */
-template<typename Scalar, typename Index=unsigned int>
+template<typename Scalar, typename Index=typename SparseMatrix<Scalar>::Index >
 class Triplet
 {
 public:
--- a/Eigen/src/SparseLU/SparseLU.h
+++ b/Eigen/src/SparseLU/SparseLU.h
@@ -219,9 +219,9 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
    }

    template<typename Rhs, typename Dest>
-    bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &_X) const
+    bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const
    {
-      Dest& X(_X.derived());
+      Dest& X(X_base.derived());
      eigen_assert(m_factorizationIsOk && "The matrix should be factorized first");
      EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,
                        THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
@@ -229,8 +229,10 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
      // Permute the right hand side to form X = Pr*B
      // on return, X is overwritten by the computed solution
      X.resize(B.rows(),B.cols());
+
+      // this ugly const_cast_derived() helps to detect aliasing when applying the permutations
      for(Index j = 0; j < B.cols(); ++j)
-        X.col(j) = rowsPermutation() * B.col(j);
+        X.col(j) = rowsPermutation() * B.const_cast_derived().col(j);
      
      //Forward substitution with L
      this->matrixL().solveInPlace(X);
@@ -258,14 +260,13 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
      eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
      // Initialize with the determinant of the row matrix
      Scalar det = Scalar(1.);
-      //Note that the diagonal blocks of U are stored in supernodes,
+      // Note that the diagonal blocks of U are stored in supernodes,
      // which are available in the  L part :)
      for (Index j = 0; j < this->cols(); ++j)
      {
        for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
        {
-          if(it.row() < j) continue;
-          if(it.row() == j)
+          if(it.index() == j)
          {
            det *= (std::abs)(it.value());
            break;
--- a/Eigen/src/SparseLU/SparseLU_Memory.h
+++ b/Eigen/src/SparseLU/SparseLU_Memory.h
@@ -70,23 +70,30 @@ Index  SparseLUImpl<Scalar,Index>::expand(VectorType& vec, Index& length, Index
  if(num_expansions == 0 || keep_prev) 
    new_len = length ; // First time allocate requested
  else 
-    new_len = Index(alpha * length);
+    new_len = (std::max)(length+1,Index(alpha * length));
  
  VectorType old_vec; // Temporary vector to hold the previous values   
  if (nbElts > 0 )
    old_vec = vec.segment(0,nbElts); 
  
  //Allocate or expand the current vector
-  try 
+#ifdef EIGEN_EXCEPTIONS
+  try
+#endif
  {
    vec.resize(new_len); 
  }
+#ifdef EIGEN_EXCEPTIONS
  catch(std::bad_alloc& )
+#else
+  if(!vec.size())
+#endif
  {
-    if ( !num_expansions )
+    if (!num_expansions)
    {
      // First time to allocate from LUMemInit()
-      throw; // Pass the exception to LUMemInit() which has a try... catch block
+      // Let LUMemInit() deals with it.
+      return -1;
    }
    if (keep_prev)
    {
@@ -100,12 +107,18 @@ Index  SparseLUImpl<Scalar,Index>::expand(VectorType& vec, Index& length, Index
      do 
      {
        alpha = (alpha + 1)/2;
-        new_len = Index(alpha * length);
+        new_len = (std::max)(length+1,Index(alpha * length));
+#ifdef EIGEN_EXCEPTIONS
        try
+#endif
        {
          vec.resize(new_len); 
        }
+#ifdef EIGEN_EXCEPTIONS
        catch(std::bad_alloc& )
+#else
+        if (!vec.size())
+#endif
        {
          tries += 1; 
          if ( tries > 10) return new_len; 
@@ -139,10 +152,9 @@ template <typename Scalar, typename Index>
 Index SparseLUImpl<Scalar,Index>::memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size,  GlobalLU_t& glu)
 {
  Index& num_expansions = glu.num_expansions; //No memory expansions so far
-  num_expansions = 0; 
-  glu.nzumax = glu.nzlumax = (std::max)(fillratio * annz, m*n); // estimated number of nonzeros in U 
+  num_expansions = 0;
+  glu.nzumax = glu.nzlumax = (std::min)(fillratio * annz / n, m) * n; // estimated number of nonzeros in U 
  glu.nzlmax = (std::max)(Index(4), fillratio) * annz / 4; // estimated  nnz in L factor
-
  // Return the estimated size to the user if necessary
  Index tempSpace;
  tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar);
@@ -166,14 +178,10 @@ Index SparseLUImpl<Scalar,Index>::memInit(Index m, Index n, Index annz, Index lw
  // Reserve memory for L/U factors
  do 
  {
-    try
-    {
-      expand<ScalarVector>(glu.lusup, glu.nzlumax, 0, 0, num_expansions); 
-      expand<ScalarVector>(glu.ucol,glu.nzumax, 0, 0, num_expansions); 
-      expand<IndexVector>(glu.lsub,glu.nzlmax, 0, 0, num_expansions); 
-      expand<IndexVector>(glu.usub,glu.nzumax, 0, 1, num_expansions); 
-    }
-    catch(std::bad_alloc& )
+    if(     (expand<ScalarVector>(glu.lusup, glu.nzlumax, 0, 0, num_expansions)<0)
+        ||  (expand<ScalarVector>(glu.ucol,  glu.nzumax,  0, 0, num_expansions)<0)
+        ||  (expand<IndexVector> (glu.lsub,  glu.nzlmax,  0, 0, num_expansions)<0)
+        ||  (expand<IndexVector> (glu.usub,  glu.nzumax,  0, 1, num_expansions)<0) )
    {
      //Reduce the estimated size and retry
      glu.nzlumax /= 2;
@@ -181,10 +189,7 @@ Index SparseLUImpl<Scalar,Index>::memInit(Index m, Index n, Index annz, Index lw
      glu.nzlmax /= 2;
      if (glu.nzlumax < annz ) return glu.nzlumax; 
    }
-    
  } while (!glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size());
-
-  
  
  ++num_expansions;
  return 0;
--- a/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
+++ b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
@@ -189,8 +189,8 @@ class MappedSuperNodalMatrix<Scalar,Index>::InnerIterator
        m_idval(mat.colIndexPtr()[outer]),
        m_startidval(m_idval),
        m_endidval(mat.colIndexPtr()[outer+1]),
-        m_idrow(mat.rowIndexPtr()[outer]),
-        m_endidrow(mat.rowIndexPtr()[outer+1])
+        m_idrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]]),
+        m_endidrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]+1])
    {}
    inline InnerIterator& operator++()
    { 
--- a/Eigen/src/SparseQR/SparseQR.h
+++ b/Eigen/src/SparseQR/SparseQR.h
@@ -2,7 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2012-2013 Desire Nuentsa <desire.nuentsa_wakam@inria.fr>
-// Copyright (C) 2012-2013 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2012-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -58,6 +58,7 @@ namespace internal {
  * \tparam _OrderingType The fill-reducing ordering method. See the \link OrderingMethods_Module 
  *  OrderingMethods \endlink module for the list of built-in and external ordering methods.
  * 
+  * \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()).
  * 
  */
 template<typename _MatrixType, typename _OrderingType>
@@ -74,13 +75,26 @@ class SparseQR
    typedef Matrix<Scalar, Dynamic, 1> ScalarVector;
    typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType;
  public:
-    SparseQR () : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false)
+    SparseQR () : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false)
    { }
    
-    SparseQR(const MatrixType& mat) : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false)
+    /** Construct a QR factorization of the matrix \a mat.
+      * 
+      * \warning The matrix \a mat must be in compressed mode (see SparseMatrix::makeCompressed()).
+      * 
+      * \sa compute()
+      */
+    SparseQR(const MatrixType& mat) : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false)
    {
      compute(mat);
    }
+    
+    /** Computes the QR factorization of the sparse matrix \a mat.
+      * 
+      * \warning The matrix \a mat must be in compressed mode (see SparseMatrix::makeCompressed()).
+      * 
+      * \sa analyzePattern(), factorize()
+      */
    void compute(const MatrixType& mat)
    {
      analyzePattern(mat);
@@ -161,11 +175,12 @@ class SparseQR
      b = y;
      
      // Solve with the triangular matrix R
+      y.resize((std::max)(cols(),Index(y.rows())),y.cols());
      y.topRows(rank) = this->matrixR().topLeftCorner(rank, rank).template triangularView<Upper>().solve(b.topRows(rank));
-      y.bottomRows(y.size()-rank).setZero();
+      y.bottomRows(y.rows()-rank).setZero();

      // Apply the column permutation
-      if (m_perm_c.size())  dest.topRows(cols()) = colsPermutation() * y.topRows(cols());
+      if (m_perm_c.size())  dest = colsPermutation() * y.topRows(cols());
      else                  dest = y.topRows(cols());
      
      m_info = Success;
@@ -205,7 +220,7 @@ class SparseQR
    
    /** \brief Reports whether previous computation was successful.
      *
-      * \returns \c Success if computation was succesful,
+      * \returns \c Success if computation was successful,
      *          \c NumericalIssue if the QR factorization reports a numerical problem
      *          \c InvalidInput if the input matrix is invalid
      *
@@ -246,7 +261,8 @@ class SparseQR
    Index m_nonzeropivots;          // Number of non zero pivots found 
    IndexVector m_etree;            // Column elimination tree
    IndexVector m_firstRowElt;      // First element in each row
-    bool m_isQSorted;                 // whether Q is sorted or not
+    bool m_isQSorted;               // whether Q is sorted or not
+    bool m_isEtreeOk;               // whether the elimination tree match the initial input matrix
    
    template <typename, typename > friend struct SparseQR_QProduct;
    template <typename > friend struct SparseQRMatrixQReturnType;
@@ -254,20 +270,26 @@ class SparseQR
 };

 /** \brief Preprocessing step of a QR factorization 
+  * 
+  * \warning The matrix \a mat must be in compressed mode (see SparseMatrix::makeCompressed()).
  * 
  * In this step, the fill-reducing permutation is computed and applied to the columns of A
-  * and the column elimination tree is computed as well. Only the sparcity pattern of \a mat is exploited.
+  * and the column elimination tree is computed as well. Only the sparsity pattern of \a mat is exploited.
  * 
  * \note In this step it is assumed that there is no empty row in the matrix \a mat.
  */
 template <typename MatrixType, typename OrderingType>
 void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
 {
+  eigen_assert(mat.isCompressed() && "SparseQR requires a sparse matrix in compressed mode. Call .makeCompressed() before passing it to SparseQR");
+  // Copy to a column major matrix if the input is rowmajor
+  typename internal::conditional<MatrixType::IsRowMajor,QRMatrixType,const MatrixType&>::type matCpy(mat);
  // Compute the column fill reducing ordering
  OrderingType ord; 
-  ord(mat, m_perm_c); 
+  ord(matCpy, m_perm_c); 
  Index n = mat.cols();
  Index m = mat.rows();
+  Index diagSize = (std::min)(m,n);
  
  if (!m_perm_c.size())
  {
@@ -277,22 +299,23 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
  
  // Compute the column elimination tree of the permuted matrix
  m_outputPerm_c = m_perm_c.inverse();
-  internal::coletree(mat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
+  internal::coletree(matCpy, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
+  m_isEtreeOk = true;
  
-  m_R.resize(n, n);
-  m_Q.resize(m, n);
+  m_R.resize(m, n);
+  m_Q.resize(m, diagSize);
  
  // Allocate space for nonzero elements : rough estimation
  m_R.reserve(2*mat.nonZeros()); //FIXME Get a more accurate estimation through symbolic factorization with the etree
  m_Q.reserve(2*mat.nonZeros());
-  m_hcoeffs.resize(n);
+  m_hcoeffs.resize(diagSize);
  m_analysisIsok = true;
 }

 /** \brief Performs the numerical QR factorization of the input matrix
  * 
  * The function SparseQR::analyzePattern(const MatrixType&) must have been called beforehand with
-  * a matrix having the same sparcity pattern than \a mat.
+  * a matrix having the same sparsity pattern than \a mat.
  * 
  * \param mat The sparse column-major matrix
  */
@@ -305,23 +328,47 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
  eigen_assert(m_analysisIsok && "analyzePattern() should be called before this step");
  Index m = mat.rows();
  Index n = mat.cols();
-  IndexVector mark(m); mark.setConstant(-1);  // Record the visited nodes
-  IndexVector Ridx(n), Qidx(m);               // Store temporarily the row indexes for the current column of R and Q
-  Index nzcolR, nzcolQ;                       // Number of nonzero for the current column of R and Q
-  ScalarVector tval(m);                       // The dense vector used to compute the current column
-  bool found_diag;
-    
+  Index diagSize = (std::min)(m,n);
+  IndexVector mark((std::max)(m,n)); mark.setConstant(-1);  // Record the visited nodes
+  IndexVector Ridx(n), Qidx(m);                             // Store temporarily the row indexes for the current column of R and Q
+  Index nzcolR, nzcolQ;                                     // Number of nonzero for the current column of R and Q
+  ScalarVector tval(m);                                     // The dense vector used to compute the current column
+  RealScalar pivotThreshold = m_threshold;
+  
+  m_R.setZero();
+  m_Q.setZero();
  m_pmat = mat;
-  m_pmat.uncompress(); // To have the innerNonZeroPtr allocated
-  // Apply the fill-in reducing permutation lazily:
-  for (int i = 0; i < n; i++)
+  if(!m_isEtreeOk)
  {
-    Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i;
-    m_pmat.outerIndexPtr()[p] = mat.outerIndexPtr()[i]; 
-    m_pmat.innerNonZeroPtr()[p] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i]; 
+    m_outputPerm_c = m_perm_c.inverse();
+    internal::coletree(m_pmat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
+    m_isEtreeOk = true;
+  }
+
+  m_pmat.uncompress(); // To have the innerNonZeroPtr allocated
+  
+  // Apply the fill-in reducing permutation lazily:
+  {
+    // If the input is row major, copy the original column indices,
+    // otherwise directly use the input matrix
+    // 
+    IndexVector originalOuterIndicesCpy;
+    const Index *originalOuterIndices = mat.outerIndexPtr();
+    if(MatrixType::IsRowMajor)
+    {
+      originalOuterIndicesCpy = IndexVector::Map(m_pmat.outerIndexPtr(),n+1);
+      originalOuterIndices = originalOuterIndicesCpy.data();
+    }
+    
+    for (int i = 0; i < n; i++)
+    {
+      Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i;
+      m_pmat.outerIndexPtr()[p] = originalOuterIndices[i]; 
+      m_pmat.innerNonZeroPtr()[p] = originalOuterIndices[i+1] - originalOuterIndices[i]; 
+    }
  }
  
-  /* Compute the default threshold, see : 
+  /* Compute the default threshold as in MatLab, see:
   * Tim Davis, "Algorithm 915, SuiteSparseQR: Multifrontal Multithreaded Rank-Revealing
   * Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011, Page 8:3 
   */
@@ -329,33 +376,35 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
  {
    RealScalar max2Norm = 0.0;
    for (int j = 0; j < n; j++) max2Norm = (max)(max2Norm, m_pmat.col(j).norm());
-    m_threshold = 20 * (m + n) * max2Norm * NumTraits<RealScalar>::epsilon();
+    if(max2Norm==RealScalar(0))
+      max2Norm = RealScalar(1);
+    pivotThreshold = 20 * (m + n) * max2Norm * NumTraits<RealScalar>::epsilon();
  }
  
  // Initialize the numerical permutation
  m_pivotperm.setIdentity(n);
  
  Index nonzeroCol = 0; // Record the number of valid pivots
-  
+  m_Q.startVec(0);
+
  // Left looking rank-revealing QR factorization: compute a column of R and Q at a time
  for (Index col = 0; col < n; ++col)
  {
    mark.setConstant(-1);
    m_R.startVec(col);
-    m_Q.startVec(col);
    mark(nonzeroCol) = col;
    Qidx(0) = nonzeroCol;
    nzcolR = 0; nzcolQ = 1;
-    found_diag = false;
+    bool found_diag = nonzeroCol>=m;
    tval.setZero(); 
    
    // Symbolic factorization: find the nonzero locations of the column k of the factors R and Q, i.e.,
    // all the nodes (with indexes lower than rank) reachable through the column elimination tree (etree) rooted at node k.
    // Note: if the diagonal entry does not exist, then its contribution must be explicitly added,
    // thus the trick with found_diag that permits to do one more iteration on the diagonal element if this one has not been found.
-    for (typename MatrixType::InnerIterator itp(m_pmat, col); itp || !found_diag; ++itp)
+    for (typename QRMatrixType::InnerIterator itp(m_pmat, col); itp || !found_diag; ++itp)
    {
-      Index curIdx = nonzeroCol ;
+      Index curIdx = nonzeroCol;
      if(itp) curIdx = itp.row();
      if(curIdx == nonzeroCol) found_diag = true;
      
@@ -397,7 +446,7 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
    // Browse all the indexes of R(:,col) in reverse order
    for (Index i = nzcolR-1; i >= 0; i--)
    {
-      Index curIdx = m_pivotperm.indices()(Ridx(i));
+      Index curIdx = Ridx(i);
      
      // Apply the curIdx-th householder vector to the current column (temporarily stored into tval)
      Scalar tdot(0);
@@ -426,33 +475,36 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
        }
      }
    } // End update current column
-        
-    // Compute the Householder reflection that eliminate the current column
-    // FIXME this step should call the Householder module.
-    Scalar tau;
-    RealScalar beta;
-    Scalar c0 = nzcolQ ? tval(Qidx(0)) : Scalar(0);
    
-    // First, the squared norm of Q((col+1):m, col)
-    RealScalar sqrNorm = 0.;
-    for (Index itq = 1; itq < nzcolQ; ++itq) sqrNorm += numext::abs2(tval(Qidx(itq)));
+    Scalar tau = 0;
+    RealScalar beta = 0;
    
-    if(sqrNorm == RealScalar(0) && numext::imag(c0) == RealScalar(0))
+    if(nonzeroCol < diagSize)
    {
-      tau = RealScalar(0);
-      beta = numext::real(c0);
-      tval(Qidx(0)) = 1;
-     }
-    else
-    {
-      beta = std::sqrt(numext::abs2(c0) + sqrNorm);
-      if(numext::real(c0) >= RealScalar(0))
-        beta = -beta;
-      tval(Qidx(0)) = 1;
-      for (Index itq = 1; itq < nzcolQ; ++itq)
-        tval(Qidx(itq)) /= (c0 - beta);
-      tau = numext::conj((beta-c0) / beta);
-        
+      // Compute the Householder reflection that eliminate the current column
+      // FIXME this step should call the Householder module.
+      Scalar c0 = nzcolQ ? tval(Qidx(0)) : Scalar(0);
+      
+      // First, the squared norm of Q((col+1):m, col)
+      RealScalar sqrNorm = 0.;
+      for (Index itq = 1; itq < nzcolQ; ++itq) sqrNorm += numext::abs2(tval(Qidx(itq)));
+      if(sqrNorm == RealScalar(0) && numext::imag(c0) == RealScalar(0))
+      {
+        beta = numext::real(c0);
+        tval(Qidx(0)) = 1;
+      }
+      else
+      {
+        using std::sqrt;
+        beta = sqrt(numext::abs2(c0) + sqrNorm);
+        if(numext::real(c0) >= RealScalar(0))
+          beta = -beta;
+        tval(Qidx(0)) = 1;
+        for (Index itq = 1; itq < nzcolQ; ++itq)
+          tval(Qidx(itq)) /= (c0 - beta);
+        tau = numext::conj((beta-c0) / beta);
+          
+      }
    }

    // Insert values in R
@@ -466,45 +518,49 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
      }
    }

-    if(abs(beta) >= m_threshold)
+    if(nonzeroCol < diagSize && abs(beta) >= pivotThreshold)
    {
      m_R.insertBackByOuterInner(col, nonzeroCol) = beta;
-      nonzeroCol++;
      // The householder coefficient
-      m_hcoeffs(col) = tau;
+      m_hcoeffs(nonzeroCol) = tau;
      // Record the householder reflections
      for (Index itq = 0; itq < nzcolQ; ++itq)
      {
        Index iQ = Qidx(itq);
-        m_Q.insertBackByOuterInnerUnordered(col,iQ) = tval(iQ);
+        m_Q.insertBackByOuterInnerUnordered(nonzeroCol,iQ) = tval(iQ);
        tval(iQ) = Scalar(0.);
-      }    
+      }
+      nonzeroCol++;
+      if(nonzeroCol<diagSize)
+        m_Q.startVec(nonzeroCol);
    }
    else
    {
      // Zero pivot found: move implicitly this column to the end
-      m_hcoeffs(col) = Scalar(0);
      for (Index j = nonzeroCol; j < n-1; j++) 
        std::swap(m_pivotperm.indices()(j), m_pivotperm.indices()[j+1]);
      
      // Recompute the column elimination tree
      internal::coletree(m_pmat, m_etree, m_firstRowElt, m_pivotperm.indices().data());
+      m_isEtreeOk = false;
    }
  }
  
+  m_hcoeffs.tail(diagSize-nonzeroCol).setZero();
+  
  // Finalize the column pointers of the sparse matrices R and Q
  m_Q.finalize();
  m_Q.makeCompressed();
  m_R.finalize();
  m_R.makeCompressed();
  m_isQSorted = false;
-  
+
  m_nonzeropivots = nonzeroCol;
  
  if(nonzeroCol<n)
  {
    // Permute the triangular factor to put the 'dead' columns to the end
-    MatrixType tempR(m_R);
+    QRMatrixType tempR(m_R);
    m_R = tempR * m_pivotperm;
    
    // Update the column permutation
@@ -560,17 +616,20 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
  template<typename DesType>
  void evalTo(DesType& res) const
  {
+    Index m = m_qr.rows();
    Index n = m_qr.cols();
+    Index diagSize = (std::min)(m,n);
    res = m_other;
    if (m_transpose)
    {
      eigen_assert(m_qr.m_Q.rows() == m_other.rows() && "Non conforming object sizes");
      //Compute res = Q' * other column by column
      for(Index j = 0; j < res.cols(); j++){
-        for (Index k = 0; k < n; k++)
+        for (Index k = 0; k < diagSize; k++)
        {
          Scalar tau = Scalar(0);
          tau = m_qr.m_Q.col(k).dot(res.col(j));
+          if(tau==Scalar(0)) continue;
          tau = tau * m_qr.m_hcoeffs(k);
          res.col(j) -= tau * m_qr.m_Q.col(k);
        }
@@ -578,14 +637,15 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
    }
    else
    {
-      eigen_assert(m_qr.m_Q.cols() == m_other.rows() && "Non conforming object sizes");
-      // Compute res = Q' * other column by column
+      eigen_assert(m_qr.m_Q.rows() == m_other.rows() && "Non conforming object sizes");
+      // Compute res = Q * other column by column
      for(Index j = 0; j < res.cols(); j++)
      {
-        for (Index k = n-1; k >=0; k--)
+        for (Index k = diagSize-1; k >=0; k--)
        {
          Scalar tau = Scalar(0);
          tau = m_qr.m_Q.col(k).dot(res.col(j));
+          if(tau==Scalar(0)) continue;
          tau = tau * m_qr.m_hcoeffs(k);
          res.col(j) -= tau * m_qr.m_Q.col(k);
        }
@@ -615,7 +675,7 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
    return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);
  }
  inline Index rows() const { return m_qr.rows(); }
-  inline Index cols() const { return m_qr.cols(); }
+  inline Index cols() const { return (std::min)(m_qr.rows(),m_qr.cols()); }
  // To use for operations with the transpose of Q
  SparseQRMatrixQTransposeReturnType<SparseQRType> transpose() const
  {
--- a/Eigen/src/StlSupport/StdDeque.h
+++ b/Eigen/src/StlSupport/StdDeque.h
@@ -11,7 +11,7 @@
 #ifndef EIGEN_STDDEQUE_H
 #define EIGEN_STDDEQUE_H

-#include "Eigen/src/StlSupport/details.h"
+#include "details.h"

 // Define the explicit instantiation (e.g. necessary for the Intel compiler)
 #if defined(__INTEL_COMPILER) || defined(__GNUC__)
--- a/Eigen/src/StlSupport/StdList.h
+++ b/Eigen/src/StlSupport/StdList.h
@@ -10,7 +10,7 @@
 #ifndef EIGEN_STDLIST_H
 #define EIGEN_STDLIST_H

-#include "Eigen/src/StlSupport/details.h"
+#include "details.h"

 // Define the explicit instantiation (e.g. necessary for the Intel compiler)
 #if defined(__INTEL_COMPILER) || defined(__GNUC__)
--- a/Eigen/src/StlSupport/StdVector.h
+++ b/Eigen/src/StlSupport/StdVector.h
@@ -11,7 +11,7 @@
 #ifndef EIGEN_STDVECTOR_H
 #define EIGEN_STDVECTOR_H

-#include "Eigen/src/StlSupport/details.h"
+#include "details.h"

 /**
 * This section contains a convenience MACRO which allows an easy specialization of
--- a/Eigen/src/UmfPackSupport/UmfPackSupport.h
+++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h
@@ -107,6 +107,16 @@ inline int umfpack_get_determinant(std::complex<double> *Mx, double *Ex, void *N
  return umfpack_zi_get_determinant(&mx_real,0,Ex,NumericHandle,User_Info);
 }

+namespace internal {
+  template<typename T> struct umfpack_helper_is_sparse_plain : false_type {};
+  template<typename Scalar, int Options, typename StorageIndex>
+  struct umfpack_helper_is_sparse_plain<SparseMatrix<Scalar,Options,StorageIndex> >
+    : true_type {};
+  template<typename Scalar, int Options, typename StorageIndex>
+  struct umfpack_helper_is_sparse_plain<MappedSparseMatrix<Scalar,Options,StorageIndex> >
+    : true_type {};
+}
+
 /** \ingroup UmfPackSupport_Module
  * \brief A sparse LU factorization and solver based on UmfPack
  *
@@ -192,10 +202,14 @@ class UmfPackLU : internal::noncopyable
     *  Note that the matrix should be column-major, and in compressed format for best performance.
     *  \sa SparseMatrix::makeCompressed().
     */
-    void compute(const MatrixType& matrix)
+    template<typename InputMatrixType>
+    void compute(const InputMatrixType& matrix)
    {
-      analyzePattern(matrix);
-      factorize(matrix);
+      if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar());
+      if(m_numeric)  umfpack_free_numeric(&m_numeric,Scalar());
+      grapInput(matrix.derived());
+      analyzePattern_impl();
+      factorize_impl();
    }

    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
@@ -230,23 +244,15 @@ class UmfPackLU : internal::noncopyable
      *
      * \sa factorize(), compute()
      */
-    void analyzePattern(const MatrixType& matrix)
+    template<typename InputMatrixType>
+    void analyzePattern(const InputMatrixType& matrix)
    {
-      if(m_symbolic)
-        umfpack_free_symbolic(&m_symbolic,Scalar());
-      if(m_numeric)
-        umfpack_free_numeric(&m_numeric,Scalar());
+      if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar());
+      if(m_numeric)  umfpack_free_numeric(&m_numeric,Scalar());
      
-      grapInput(matrix);
+      grapInput(matrix.derived());

-      int errorCode = 0;
-      errorCode = umfpack_symbolic(matrix.rows(), matrix.cols(), m_outerIndexPtr, m_innerIndexPtr, m_valuePtr,
-                                   &m_symbolic, 0, 0);
-
-      m_isInitialized = true;
-      m_info = errorCode ? InvalidInput : Success;
-      m_analysisIsOk = true;
-      m_factorizationIsOk = false;
+      analyzePattern_impl();
    }

    /** Performs a numeric decomposition of \a matrix
@@ -255,20 +261,16 @@ class UmfPackLU : internal::noncopyable
      *
      * \sa analyzePattern(), compute()
      */
-    void factorize(const MatrixType& matrix)
+    template<typename InputMatrixType>
+    void factorize(const InputMatrixType& matrix)
    {
      eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()");
      if(m_numeric)
        umfpack_free_numeric(&m_numeric,Scalar());

-      grapInput(matrix);
-
-      int errorCode;
-      errorCode = umfpack_numeric(m_outerIndexPtr, m_innerIndexPtr, m_valuePtr,
-                                  m_symbolic, &m_numeric, 0, 0);
-
-      m_info = errorCode ? NumericalIssue : Success;
-      m_factorizationIsOk = true;
+      grapInput(matrix.derived());
+      
+      factorize_impl();
    }

    #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -283,19 +285,20 @@ class UmfPackLU : internal::noncopyable

  protected:

-
    void init()
    {
-      m_info = InvalidInput;
-      m_isInitialized = false;
-      m_numeric = 0;
-      m_symbolic = 0;
-      m_outerIndexPtr = 0;
-      m_innerIndexPtr = 0;
-      m_valuePtr      = 0;
+      m_info                  = InvalidInput;
+      m_isInitialized         = false;
+      m_numeric               = 0;
+      m_symbolic              = 0;
+      m_outerIndexPtr         = 0;
+      m_innerIndexPtr         = 0;
+      m_valuePtr              = 0;
+      m_extractedDataAreDirty = true;
    }
    
-    void grapInput(const MatrixType& mat)
+    template<typename InputMatrixType>
+    void grapInput_impl(const InputMatrixType& mat, internal::true_type)
    {
      m_copyMatrix.resize(mat.rows(), mat.cols());
      if( ((MatrixType::Flags&RowMajorBit)==RowMajorBit) || sizeof(typename MatrixType::Index)!=sizeof(int) || !mat.isCompressed() )
@@ -313,6 +316,45 @@ class UmfPackLU : internal::noncopyable
        m_valuePtr      = mat.valuePtr();
      }
    }
+    
+    template<typename InputMatrixType>
+    void grapInput_impl(const InputMatrixType& mat, internal::false_type)
+    {
+      m_copyMatrix = mat;
+      m_outerIndexPtr = m_copyMatrix.outerIndexPtr();
+      m_innerIndexPtr = m_copyMatrix.innerIndexPtr();
+      m_valuePtr      = m_copyMatrix.valuePtr();
+    }
+    
+    template<typename InputMatrixType>
+    void grapInput(const InputMatrixType& mat)
+    {
+      grapInput_impl(mat, internal::umfpack_helper_is_sparse_plain<InputMatrixType>());
+    }
+    
+    void analyzePattern_impl()
+    {
+      int errorCode = 0;
+      errorCode = umfpack_symbolic(m_copyMatrix.rows(), m_copyMatrix.cols(), m_outerIndexPtr, m_innerIndexPtr, m_valuePtr,
+                                   &m_symbolic, 0, 0);
+
+      m_isInitialized = true;
+      m_info = errorCode ? InvalidInput : Success;
+      m_analysisIsOk = true;
+      m_factorizationIsOk = false;
+      m_extractedDataAreDirty = true;
+    }
+    
+    void factorize_impl()
+    {
+      int errorCode;
+      errorCode = umfpack_numeric(m_outerIndexPtr, m_innerIndexPtr, m_valuePtr,
+                                  m_symbolic, &m_numeric, 0, 0);
+
+      m_info = errorCode ? NumericalIssue : Success;
+      m_factorizationIsOk = true;
+      m_extractedDataAreDirty = true;
+    }

    // cached data to reduce reallocation, etc.
    mutable LUMatrixType m_l;
--- a/Eigen/src/plugins/BlockMethods.h
+++ b/Eigen/src/plugins/BlockMethods.h
@@ -113,13 +113,13 @@ inline const Block<const Derived, CRows, CCols> topRightCorner() const

 /** \returns an expression of a top-right corner of *this.
  *
-  * \tparam CRows number of rows in corner as specified at compile time
-  * \tparam CCols number of columns in corner as specified at compile time
-  * \param  cRows number of rows in corner as specified at run time
-  * \param  cCols number of columns in corner as specified at run time
+  * \tparam CRows number of rows in corner as specified at compile-time
+  * \tparam CCols number of columns in corner as specified at compile-time
+  * \param  cRows number of rows in corner as specified at run-time
+  * \param  cCols number of columns in corner as specified at run-time
  *
-  * This function is mainly useful for corners where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for corners where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
  * information should not contradict. In other words, \a cRows should equal \a CRows unless
  * \a CRows is \a Dynamic, and the same for the number of columns.
  *
@@ -188,13 +188,13 @@ inline const Block<const Derived, CRows, CCols> topLeftCorner() const

 /** \returns an expression of a top-left corner of *this.
  *
-  * \tparam CRows number of rows in corner as specified at compile time
-  * \tparam CCols number of columns in corner as specified at compile time
-  * \param  cRows number of rows in corner as specified at run time
-  * \param  cCols number of columns in corner as specified at run time
+  * \tparam CRows number of rows in corner as specified at compile-time
+  * \tparam CCols number of columns in corner as specified at compile-time
+  * \param  cRows number of rows in corner as specified at run-time
+  * \param  cCols number of columns in corner as specified at run-time
  *
-  * This function is mainly useful for corners where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for corners where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
  * information should not contradict. In other words, \a cRows should equal \a CRows unless
  * \a CRows is \a Dynamic, and the same for the number of columns.
  *
@@ -263,13 +263,13 @@ inline const Block<const Derived, CRows, CCols> bottomRightCorner() const

 /** \returns an expression of a bottom-right corner of *this.
  *
-  * \tparam CRows number of rows in corner as specified at compile time
-  * \tparam CCols number of columns in corner as specified at compile time
-  * \param  cRows number of rows in corner as specified at run time
-  * \param  cCols number of columns in corner as specified at run time
+  * \tparam CRows number of rows in corner as specified at compile-time
+  * \tparam CCols number of columns in corner as specified at compile-time
+  * \param  cRows number of rows in corner as specified at run-time
+  * \param  cCols number of columns in corner as specified at run-time
  *
-  * This function is mainly useful for corners where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for corners where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
  * information should not contradict. In other words, \a cRows should equal \a CRows unless
  * \a CRows is \a Dynamic, and the same for the number of columns.
  *
@@ -338,13 +338,13 @@ inline const Block<const Derived, CRows, CCols> bottomLeftCorner() const

 /** \returns an expression of a bottom-left corner of *this.
  *
-  * \tparam CRows number of rows in corner as specified at compile time
-  * \tparam CCols number of columns in corner as specified at compile time
-  * \param  cRows number of rows in corner as specified at run time
-  * \param  cCols number of columns in corner as specified at run time
+  * \tparam CRows number of rows in corner as specified at compile-time
+  * \tparam CCols number of columns in corner as specified at compile-time
+  * \param  cRows number of rows in corner as specified at run-time
+  * \param  cCols number of columns in corner as specified at run-time
  *
-  * This function is mainly useful for corners where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for corners where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
  * information should not contradict. In other words, \a cRows should equal \a CRows unless
  * \a CRows is \a Dynamic, and the same for the number of columns.
  *
@@ -390,7 +390,11 @@ inline ConstRowsBlockXpr topRows(Index n) const

 /** \returns a block consisting of the top rows of *this.
  *
-  * \tparam N the number of rows in the block
+  * \tparam N the number of rows in the block as specified at compile-time
+  * \param n the number of rows in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include MatrixBase_template_int_topRows.cpp
  * Output: \verbinclude MatrixBase_template_int_topRows.out
@@ -398,16 +402,16 @@ inline ConstRowsBlockXpr topRows(Index n) const
  * \sa class Block, block(Index,Index,Index,Index)
  */
 template<int N>
-inline typename NRowsBlockXpr<N>::Type topRows()
+inline typename NRowsBlockXpr<N>::Type topRows(Index n = N)
 {
-  return typename NRowsBlockXpr<N>::Type(derived(), 0, 0, N, cols());
+  return typename NRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
 }

 /** This is the const version of topRows<int>().*/
 template<int N>
-inline typename ConstNRowsBlockXpr<N>::Type topRows() const
+inline typename ConstNRowsBlockXpr<N>::Type topRows(Index n = N) const
 {
-  return typename ConstNRowsBlockXpr<N>::Type(derived(), 0, 0, N, cols());
+  return typename ConstNRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
 }


@@ -434,7 +438,11 @@ inline ConstRowsBlockXpr bottomRows(Index n) const

 /** \returns a block consisting of the bottom rows of *this.
  *
-  * \tparam N the number of rows in the block
+  * \tparam N the number of rows in the block as specified at compile-time
+  * \param n the number of rows in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include MatrixBase_template_int_bottomRows.cpp
  * Output: \verbinclude MatrixBase_template_int_bottomRows.out
@@ -442,16 +450,16 @@ inline ConstRowsBlockXpr bottomRows(Index n) const
  * \sa class Block, block(Index,Index,Index,Index)
  */
 template<int N>
-inline typename NRowsBlockXpr<N>::Type bottomRows()
+inline typename NRowsBlockXpr<N>::Type bottomRows(Index n = N)
 {
-  return typename NRowsBlockXpr<N>::Type(derived(), rows() - N, 0, N, cols());
+  return typename NRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
 }

 /** This is the const version of bottomRows<int>().*/
 template<int N>
-inline typename ConstNRowsBlockXpr<N>::Type bottomRows() const
+inline typename ConstNRowsBlockXpr<N>::Type bottomRows(Index n = N) const
 {
-  return typename ConstNRowsBlockXpr<N>::Type(derived(), rows() - N, 0, N, cols());
+  return typename ConstNRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
 }


@@ -459,28 +467,32 @@ inline typename ConstNRowsBlockXpr<N>::Type bottomRows() const
 /** \returns a block consisting of a range of rows of *this.
  *
  * \param startRow the index of the first row in the block
-  * \param numRows the number of rows in the block
+  * \param n the number of rows in the block
  *
  * Example: \include DenseBase_middleRows_int.cpp
  * Output: \verbinclude DenseBase_middleRows_int.out
  *
  * \sa class Block, block(Index,Index,Index,Index)
  */
-inline RowsBlockXpr middleRows(Index startRow, Index numRows)
+inline RowsBlockXpr middleRows(Index startRow, Index n)
 {
-  return RowsBlockXpr(derived(), startRow, 0, numRows, cols());
+  return RowsBlockXpr(derived(), startRow, 0, n, cols());
 }

 /** This is the const version of middleRows(Index,Index).*/
-inline ConstRowsBlockXpr middleRows(Index startRow, Index numRows) const
+inline ConstRowsBlockXpr middleRows(Index startRow, Index n) const
 {
-  return ConstRowsBlockXpr(derived(), startRow, 0, numRows, cols());
+  return ConstRowsBlockXpr(derived(), startRow, 0, n, cols());
 }

 /** \returns a block consisting of a range of rows of *this.
  *
-  * \tparam N the number of rows in the block
+  * \tparam N the number of rows in the block as specified at compile-time
  * \param startRow the index of the first row in the block
+  * \param n the number of rows in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include DenseBase_template_int_middleRows.cpp
  * Output: \verbinclude DenseBase_template_int_middleRows.out
@@ -488,16 +500,16 @@ inline ConstRowsBlockXpr middleRows(Index startRow, Index numRows) const
  * \sa class Block, block(Index,Index,Index,Index)
  */
 template<int N>
-inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow)
+inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N)
 {
-  return typename NRowsBlockXpr<N>::Type(derived(), startRow, 0, N, cols());
+  return typename NRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
 }

 /** This is the const version of middleRows<int>().*/
 template<int N>
-inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow) const
+inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N) const
 {
-  return typename ConstNRowsBlockXpr<N>::Type(derived(), startRow, 0, N, cols());
+  return typename ConstNRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
 }


@@ -524,7 +536,11 @@ inline ConstColsBlockXpr leftCols(Index n) const

 /** \returns a block consisting of the left columns of *this.
  *
-  * \tparam N the number of columns in the block
+  * \tparam N the number of columns in the block as specified at compile-time
+  * \param n the number of columns in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include MatrixBase_template_int_leftCols.cpp
  * Output: \verbinclude MatrixBase_template_int_leftCols.out
@@ -532,16 +548,16 @@ inline ConstColsBlockXpr leftCols(Index n) const
  * \sa class Block, block(Index,Index,Index,Index)
  */
 template<int N>
-inline typename NColsBlockXpr<N>::Type leftCols()
+inline typename NColsBlockXpr<N>::Type leftCols(Index n = N)
 {
-  return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), N);
+  return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
 }

 /** This is the const version of leftCols<int>().*/
 template<int N>
-inline typename ConstNColsBlockXpr<N>::Type leftCols() const
+inline typename ConstNColsBlockXpr<N>::Type leftCols(Index n = N) const
 {
-  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, 0, rows(), N);
+  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
 }


@@ -568,7 +584,11 @@ inline ConstColsBlockXpr rightCols(Index n) const

 /** \returns a block consisting of the right columns of *this.
  *
-  * \tparam N the number of columns in the block
+  * \tparam N the number of columns in the block as specified at compile-time
+  * \param n the number of columns in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include MatrixBase_template_int_rightCols.cpp
  * Output: \verbinclude MatrixBase_template_int_rightCols.out
@@ -576,16 +596,16 @@ inline ConstColsBlockXpr rightCols(Index n) const
  * \sa class Block, block(Index,Index,Index,Index)
  */
 template<int N>
-inline typename NColsBlockXpr<N>::Type rightCols()
+inline typename NColsBlockXpr<N>::Type rightCols(Index n = N)
 {
-  return typename NColsBlockXpr<N>::Type(derived(), 0, cols() - N, rows(), N);
+  return typename NColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
 }

 /** This is the const version of rightCols<int>().*/
 template<int N>
-inline typename ConstNColsBlockXpr<N>::Type rightCols() const
+inline typename ConstNColsBlockXpr<N>::Type rightCols(Index n = N) const
 {
-  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, cols() - N, rows(), N);
+  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
 }


@@ -613,8 +633,12 @@ inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const

 /** \returns a block consisting of a range of columns of *this.
  *
-  * \tparam N the number of columns in the block
+  * \tparam N the number of columns in the block as specified at compile-time
  * \param startCol the index of the first column in the block
+  * \param n the number of columns in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include DenseBase_template_int_middleCols.cpp
  * Output: \verbinclude DenseBase_template_int_middleCols.out
@@ -622,16 +646,16 @@ inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const
  * \sa class Block, block(Index,Index,Index,Index)
  */
 template<int N>
-inline typename NColsBlockXpr<N>::Type middleCols(Index startCol)
+inline typename NColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N)
 {
-  return typename NColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), N);
+  return typename NColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
 }

 /** This is the const version of middleCols<int>().*/
 template<int N>
-inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol) const
+inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N) const
 {
-  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), N);
+  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
 }


@@ -667,15 +691,15 @@ inline const Block<const Derived, BlockRows, BlockCols> block(Index startRow, In

 /** \returns an expression of a block in *this.
  *
-  * \tparam BlockRows number of rows in block as specified at compile time
-  * \tparam BlockCols number of columns in block as specified at compile time
+  * \tparam BlockRows number of rows in block as specified at compile-time
+  * \tparam BlockCols number of columns in block as specified at compile-time
  * \param  startRow  the first row in the block
  * \param  startCol  the first column in the block
-  * \param  blockRows number of rows in block as specified at run time
-  * \param  blockCols number of columns in block as specified at run time
+  * \param  blockRows number of rows in block as specified at run-time
+  * \param  blockCols number of columns in block as specified at run-time
  *
-  * This function is mainly useful for blocks where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for blocks where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
  * information should not contradict. In other words, \a blockRows should equal \a BlockRows unless
  * \a BlockRows is \a Dynamic, and the same for the number of columns.
  *
@@ -738,7 +762,7 @@ inline ConstRowXpr row(Index i) const
  * \only_for_vectors
  *
  * \param start the first coefficient in the segment
-  * \param vecSize the number of coefficients in the segment
+  * \param n the number of coefficients in the segment
  *
  * Example: \include MatrixBase_segment_int_int.cpp
  * Output: \verbinclude MatrixBase_segment_int_int.out
@@ -749,25 +773,25 @@ inline ConstRowXpr row(Index i) const
  *
  * \sa class Block, segment(Index)
  */
-inline SegmentReturnType segment(Index start, Index vecSize)
+inline SegmentReturnType segment(Index start, Index n)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return SegmentReturnType(derived(), start, vecSize);
+  return SegmentReturnType(derived(), start, n);
 }


 /** This is the const version of segment(Index,Index).*/
-inline ConstSegmentReturnType segment(Index start, Index vecSize) const
+inline ConstSegmentReturnType segment(Index start, Index n) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return ConstSegmentReturnType(derived(), start, vecSize);
+  return ConstSegmentReturnType(derived(), start, n);
 }

 /** \returns a dynamic-size expression of the first coefficients of *this.
  *
  * \only_for_vectors
  *
-  * \param vecSize the number of coefficients in the block
+  * \param n the number of coefficients in the segment
  *
  * Example: \include MatrixBase_start_int.cpp
  * Output: \verbinclude MatrixBase_start_int.out
@@ -778,25 +802,24 @@ inline ConstSegmentReturnType segment(Index start, Index vecSize) const
  *
  * \sa class Block, block(Index,Index)
  */
-inline SegmentReturnType head(Index vecSize)
+inline SegmentReturnType head(Index n)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return SegmentReturnType(derived(), 0, vecSize);
+  return SegmentReturnType(derived(), 0, n);
 }

 /** This is the const version of head(Index).*/
-inline ConstSegmentReturnType
-  head(Index vecSize) const
+inline ConstSegmentReturnType head(Index n) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return ConstSegmentReturnType(derived(), 0, vecSize);
+  return ConstSegmentReturnType(derived(), 0, n);
 }

 /** \returns a dynamic-size expression of the last coefficients of *this.
  *
  * \only_for_vectors
  *
-  * \param vecSize the number of coefficients in the block
+  * \param n the number of coefficients in the segment
  *
  * Example: \include MatrixBase_end_int.cpp
  * Output: \verbinclude MatrixBase_end_int.out
@@ -807,95 +830,106 @@ inline ConstSegmentReturnType
  *
  * \sa class Block, block(Index,Index)
  */
-inline SegmentReturnType tail(Index vecSize)
+inline SegmentReturnType tail(Index n)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return SegmentReturnType(derived(), this->size() - vecSize, vecSize);
+  return SegmentReturnType(derived(), this->size() - n, n);
 }

 /** This is the const version of tail(Index).*/
-inline ConstSegmentReturnType tail(Index vecSize) const
+inline ConstSegmentReturnType tail(Index n) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return ConstSegmentReturnType(derived(), this->size() - vecSize, vecSize);
+  return ConstSegmentReturnType(derived(), this->size() - n, n);
 }

 /** \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this
  *
  * \only_for_vectors
  *
-  * The template parameter \a Size is the number of coefficients in the block
+  * \tparam N the number of coefficients in the segment as specified at compile-time
+  * \param start the index of the first element in the segment
+  * \param n the number of coefficients in the segment as specified at compile-time
  *
-  * \param start the index of the first element of the sub-vector
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include MatrixBase_template_int_segment.cpp
  * Output: \verbinclude MatrixBase_template_int_segment.out
  *
  * \sa class Block
  */
-template<int Size>
-inline typename FixedSegmentReturnType<Size>::Type segment(Index start)
+template<int N>
+inline typename FixedSegmentReturnType<N>::Type segment(Index start, Index n = N)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename FixedSegmentReturnType<Size>::Type(derived(), start);
+  return typename FixedSegmentReturnType<N>::Type(derived(), start, n);
 }

 /** This is the const version of segment<int>(Index).*/
-template<int Size>
-inline typename ConstFixedSegmentReturnType<Size>::Type segment(Index start) const
+template<int N>
+inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index n = N) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename ConstFixedSegmentReturnType<Size>::Type(derived(), start);
+  return typename ConstFixedSegmentReturnType<N>::Type(derived(), start, n);
 }

 /** \returns a fixed-size expression of the first coefficients of *this.
  *
  * \only_for_vectors
  *
-  * The template parameter \a Size is the number of coefficients in the block
+  * \tparam N the number of coefficients in the segment as specified at compile-time
+  * \param  n the number of coefficients in the segment as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include MatrixBase_template_int_start.cpp
  * Output: \verbinclude MatrixBase_template_int_start.out
  *
  * \sa class Block
  */
-template<int Size>
-inline typename FixedSegmentReturnType<Size>::Type head()
+template<int N>
+inline typename FixedSegmentReturnType<N>::Type head(Index n = N)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename FixedSegmentReturnType<Size>::Type(derived(), 0);
+  return typename FixedSegmentReturnType<N>::Type(derived(), 0, n);
 }

 /** This is the const version of head<int>().*/
-template<int Size>
-inline typename ConstFixedSegmentReturnType<Size>::Type head() const
+template<int N>
+inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename ConstFixedSegmentReturnType<Size>::Type(derived(), 0);
+  return typename ConstFixedSegmentReturnType<N>::Type(derived(), 0, n);
 }

 /** \returns a fixed-size expression of the last coefficients of *this.
  *
  * \only_for_vectors
  *
-  * The template parameter \a Size is the number of coefficients in the block
+  * \tparam N the number of coefficients in the segment as specified at compile-time
+  * \param  n the number of coefficients in the segment as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include MatrixBase_template_int_end.cpp
  * Output: \verbinclude MatrixBase_template_int_end.out
  *
  * \sa class Block
  */
-template<int Size>
-inline typename FixedSegmentReturnType<Size>::Type tail()
+template<int N>
+inline typename FixedSegmentReturnType<N>::Type tail(Index n = N)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename FixedSegmentReturnType<Size>::Type(derived(), size() - Size);
+  return typename FixedSegmentReturnType<N>::Type(derived(), size() - n);
 }

 /** This is the const version of tail<int>.*/
-template<int Size>
-inline typename ConstFixedSegmentReturnType<Size>::Type tail() const
+template<int N>
+inline typename ConstFixedSegmentReturnType<N>::Type tail(Index n = N) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename ConstFixedSegmentReturnType<Size>::Type(derived(), size() - Size);
+  return typename ConstFixedSegmentReturnType<N>::Type(derived(), size() - n);
 }
--- a/Eigen/src/plugins/MatrixCwiseBinaryOps.h
+++ b/Eigen/src/plugins/MatrixCwiseBinaryOps.h
@@ -83,7 +83,7 @@ cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const ConstantReturnType>
 cwiseMin(const Scalar &other) const
 {
-  return cwiseMin(Derived::PlainObject::Constant(rows(), cols(), other));
+  return cwiseMin(Derived::Constant(rows(), cols(), other));
 }

 /** \returns an expression of the coefficient-wise max of *this and \a other
@@ -107,7 +107,7 @@ cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const ConstantReturnType>
 cwiseMax(const Scalar &other) const
 {
-  return cwiseMax(Derived::PlainObject::Constant(rows(), cols(), other));
+  return cwiseMax(Derived::Constant(rows(), cols(), other));
 }


--- a/blas/CMakeLists.txt
+++ b/blas/CMakeLists.txt
@@ -7,6 +7,9 @@ workaround_9220(Fortran EIGEN_Fortran_COMPILER_WORKS)

 if(EIGEN_Fortran_COMPILER_WORKS)
  enable_language(Fortran OPTIONAL)
+  if(NOT CMAKE_Fortran_COMPILER)
+    set(EIGEN_Fortran_COMPILER_WORKS OFF)
+  endif()
 endif()

 add_custom_target(blas)
--- a/blas/README.txt
+++ b/blas/README.txt
@@ -1,9 +1,6 @@

 This directory contains a BLAS library built on top of Eigen.

-This is currently a work in progress which is far to be ready for use,
-but feel free to contribute to it if you wish.
-
 This module is not built by default. In order to compile it, you need to
 type 'make blas' from within your build dir.

--- a/cmake/EigenConfigureTesting.cmake
+++ b/cmake/EigenConfigureTesting.cmake
@@ -41,7 +41,7 @@ endif()

 # copy ctest properties, which currently
 # o raise the warning levels
-configure_file(${CMAKE_BINARY_DIR}/DartConfiguration.tcl ${CMAKE_BINARY_DIR}/DartConfiguration.tcl)
+configure_file(${CMAKE_CURRENT_BINARY_DIR}/DartConfiguration.tcl ${CMAKE_BINARY_DIR}/DartConfiguration.tcl)

 # restore default CMAKE_MAKE_PROGRAM
 set(CMAKE_MAKE_PROGRAM ${CMAKE_MAKE_PROGRAM_SAVE})
@@ -50,7 +50,7 @@ set(CMAKE_MAKE_PROGRAM ${CMAKE_MAKE_PROGRAM_SAVE})
 set(CMAKE_MAKE_PROGRAM_SAVE) 
 set(EIGEN_MAKECOMMAND_PLACEHOLDER)

-configure_file(${CMAKE_SOURCE_DIR}/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake)

 # some documentation of this function would be nice
 ei_init_testing()
--- a/cmake/EigenTesting.cmake
+++ b/cmake/EigenTesting.cmake
@@ -452,20 +452,12 @@ macro(ei_set_build_string)
 endmacro(ei_set_build_string)

 macro(ei_is_64bit_env VAR)
-
-  file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/is64.cpp"
-      "int main() { return (sizeof(int*) == 8 ? 1 : 0); }
-      ")
-  try_run(run_res compile_res
-         ${CMAKE_CURRENT_BINARY_DIR} "${CMAKE_CURRENT_BINARY_DIR}/is64.cpp"
-          RUN_OUTPUT_VARIABLE run_output)
-
-  if(compile_res AND run_res)
-    set(${VAR} ${run_res})
-  elseif(CMAKE_CL_64)
-    set(${VAR} 1)
-  elseif("$ENV{Platform}" STREQUAL "X64") # nmake 64 bit
+  if(CMAKE_SIZEOF_VOID_P EQUAL 8)
    set(${VAR} 1)
+  elseif(CMAKE_SIZEOF_VOID_P EQUAL 4)
+    set(${VAR} 0)
+  else()
+    message(WARNING "Unsupported pointer size. Please contact the authors.")
  endif()
 endmacro(ei_is_64bit_env)

--- a/cmake/FindCholmod.cmake
+++ b/cmake/FindCholmod.cmake
@@ -86,4 +86,4 @@ include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(CHOLMOD DEFAULT_MSG
                                  CHOLMOD_INCLUDES CHOLMOD_LIBRARIES)

-mark_as_advanced(CHOLMOD_INCLUDES CHOLMOD_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY SUITESPARSE_LIBRARY)
+mark_as_advanced(CHOLMOD_INCLUDES CHOLMOD_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY SUITESPARSE_LIBRARY CAMD_LIBRARY CCOLAMD_LIBRARY CHOLMOD_METIS_LIBRARY)
--- a/cmake/FindFFTW.cmake
+++ b/cmake/FindFFTW.cmake
@@ -115,5 +115,5 @@ include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(FFTW DEFAULT_MSG
                                  FFTW_INCLUDES FFTW_LIBRARIES)

-mark_as_advanced(FFTW_INCLUDES FFTW_LIBRARIES)
+mark_as_advanced(FFTW_INCLUDES FFTW_LIBRARIES FFTW_LIB FFTWF_LIB FFTWL_LIB)

--- a/cmake/FindMetis.cmake
+++ b/cmake/FindMetis.cmake
@@ -10,16 +10,50 @@ find_path(METIS_INCLUDES
  PATHS 
  $ENV{METISDIR} 
  ${INCLUDE_INSTALL_DIR} 
-  PATH_SUFFIXES 
+  PATH_SUFFIXES
+  .
  metis
  include
 )

+macro(_metis_check_version)
+  file(READ "${METIS_INCLUDES}/metis.h" _metis_version_header)
+
+  string(REGEX MATCH "define[ \t]+METIS_VER_MAJOR[ \t]+([0-9]+)" _metis_major_version_match "${_metis_version_header}")
+  set(METIS_MAJOR_VERSION "${CMAKE_MATCH_1}")
+  string(REGEX MATCH "define[ \t]+METIS_VER_MINOR[ \t]+([0-9]+)" _metis_minor_version_match "${_metis_version_header}")
+  set(METIS_MINOR_VERSION "${CMAKE_MATCH_1}")
+  string(REGEX MATCH "define[ \t]+METIS_VER_SUBMINOR[ \t]+([0-9]+)" _metis_subminor_version_match "${_metis_version_header}")
+  set(METIS_SUBMINOR_VERSION "${CMAKE_MATCH_1}")
+  if(NOT METIS_MAJOR_VERSION)
+    message(WARNING "Could not determine Metis version. Assuming version 4.0.0")
+    set(METIS_VERSION 4.0.0)
+  else()
+    set(METIS_VERSION ${METIS_MAJOR_VERSION}.${METIS_MINOR_VERSION}.${METIS_SUBMINOR_VERSION})
+  endif()
+  if(${METIS_VERSION} VERSION_LESS ${Metis_FIND_VERSION})
+    set(METIS_VERSION_OK FALSE)
+  else()
+    set(METIS_VERSION_OK TRUE)
+  endif()
+
+  if(NOT METIS_VERSION_OK)
+    message(STATUS "Metis version ${METIS_VERSION} found in ${METIS_INCLUDES}, "
+                   "but at least version ${Metis_FIND_VERSION} is required")
+  endif(NOT METIS_VERSION_OK)
+endmacro(_metis_check_version)
+
+  if(METIS_INCLUDES AND Metis_FIND_VERSION)
+    _metis_check_version()
+  else()
+    set(METIS_VERSION_OK TRUE)
+  endif()
+

 find_library(METIS_LIBRARIES metis PATHS $ENV{METISDIR} ${LIB_INSTALL_DIR} PATH_SUFFIXES lib)

 include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(METIS DEFAULT_MSG
-                                  METIS_INCLUDES METIS_LIBRARIES)
+                                  METIS_INCLUDES METIS_LIBRARIES METIS_VERSION_OK)

 mark_as_advanced(METIS_INCLUDES METIS_LIBRARIES)
--- a/Show More
+++ b/Show More