Add CI to build docs

erm.. use proper id
update tracking code for 3.2 branch
2026-04-10 11:34:33 +08:00 · 2025-10-16 21:59:59 -07:00 · 2019-03-12 13:53:51 +01:00 · 2019-03-12 13:49:49 +01:00 · 2017-01-06 18:01:29 +01:00 · 2016-12-20 22:17:01 +01:00
218 changed files with 4750 additions and 1826 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,41 @@
+qrc_*cxx
+*.orig
+*.pyc
+*.diff
+diff
+*.save
+save
+*.old
+*.gmo
+*.qm
+core
+core.*
+*.bak
+*~
+*.build*
+*.moc.*
+*.moc
+ui_*
+CMakeCache.txt
+tags
+.*.swp
+activity.png
+*.out
+*.php*
+*.log
+*.orig
+*.rej
+log
+patch
+*.patch
+a
+a.*
+lapack/testing
+lapack/reference
+.*project
+.settings
+Makefile
+!ci/build.gitlab-ci.yml
+!scripts/buildtests.in
+!Eigen/Core
+!Eigen/src/Core
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -0,0 +1,28 @@
+# This file is part of Eigen, a lightweight C++ template library
+# for linear algebra.
+#
+# Copyright (C) 2023, The Eigen Authors
+#
+# This Source Code Form is subject to the terms of the Mozilla
+# Public License v. 2.0. If a copy of the MPL was not distributed
+# with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+stages:
+  - build
+  - deploy
+
+variables:
+  # CMake build directory.
+  EIGEN_CI_BUILDDIR: .build
+  # Specify the CMake build target.
+  EIGEN_CI_BUILD_TARGET: ""
+  # If a test regex is specified, that will be selected.
+  # Otherwise, we will try a label if specified.
+  EIGEN_CI_CTEST_REGEX: ""
+  EIGEN_CI_CTEST_LABEL: ""
+  EIGEN_CI_CTEST_ARGS: ""
+
+include:
+  - "/ci/common.gitlab-ci.yml"
+  - "/ci/build.linux.gitlab-ci.yml"
+  - "/ci/deploy.gitlab-ci.yml"
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,5 @@
 project(Eigen)
-
-cmake_minimum_required(VERSION 2.8.2)
+cmake_minimum_required(VERSION 2.8.5)

 # guard against in-source builds

@@ -55,6 +54,7 @@ endif(EIGEN_HG_CHANGESET)


 include(CheckCXXCompilerFlag)
+include(GNUInstallDirs)

 set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)

@@ -288,25 +288,26 @@ option(EIGEN_TEST_C++0x "Enables all C++0x features." OFF)

 include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})

-# the user modifiable install path for header files
-set(EIGEN_INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR} CACHE PATH "The directory where we install the header files (optional)")
-
-# set the internal install path for header files which depends on wether the user modifiable
-# EIGEN_INCLUDE_INSTALL_DIR has been set by the user or not.
-if(EIGEN_INCLUDE_INSTALL_DIR)
-  set(INCLUDE_INSTALL_DIR
-    ${EIGEN_INCLUDE_INSTALL_DIR}
-    CACHE INTERNAL
-    "The directory where we install the header files (internal)"
-  )
+# Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR
+if(EIGEN_INCLUDE_INSTALL_DIR AND NOT INCLUDE_INSTALL_DIR)
+  set(INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR}
+      CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed")
 else()
  set(INCLUDE_INSTALL_DIR
-    "${CMAKE_INSTALL_PREFIX}/include/eigen3"
-    CACHE INTERNAL
-    "The directory where we install the header files (internal)"
-  )
+      "${CMAKE_INSTALL_INCLUDEDIR}/eigen3"
+      CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed"
+      )
 endif()

+set(CMAKEPACKAGE_INSTALL_DIR
+    "${CMAKE_INSTALL_LIBDIR}/cmake/eigen3"
+    CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed"
+    )
+set(PKGCONFIG_INSTALL_DIR
+    "${CMAKE_INSTALL_DATADIR}/pkgconfig"
+    CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where eigen3.pc is installed"
+    )
+
 # similar to set_target_properties but append the property instead of overwriting it
 macro(ei_add_target_property target prop value)

@@ -324,21 +325,9 @@ install(FILES
  )

 if(EIGEN_BUILD_PKGCONFIG)
-    SET(path_separator ":")
-    STRING(REPLACE ${path_separator} ";" pkg_config_libdir_search "$ENV{PKG_CONFIG_LIBDIR}")
-    message(STATUS "searching for 'pkgconfig' directory in PKG_CONFIG_LIBDIR ( $ENV{PKG_CONFIG_LIBDIR} ), ${CMAKE_INSTALL_PREFIX}/share, and ${CMAKE_INSTALL_PREFIX}/lib")
-    FIND_PATH(pkg_config_libdir pkgconfig ${pkg_config_libdir_search} ${CMAKE_INSTALL_PREFIX}/share ${CMAKE_INSTALL_PREFIX}/lib ${pkg_config_libdir_search})
-    if(pkg_config_libdir)
-        SET(pkg_config_install_dir ${pkg_config_libdir})
-        message(STATUS "found ${pkg_config_libdir}/pkgconfig" )
-    else(pkg_config_libdir)
-        SET(pkg_config_install_dir ${CMAKE_INSTALL_PREFIX}/share)
-        message(STATUS "pkgconfig not found; installing in ${pkg_config_install_dir}" )
-    endif(pkg_config_libdir)
-
-    configure_file(eigen3.pc.in eigen3.pc)
+    configure_file(eigen3.pc.in eigen3.pc @ONLY)
    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc
-        DESTINATION ${pkg_config_install_dir}/pkgconfig
+        DESTINATION ${PKGCONFIG_INSTALL_DIR}
        )
 endif(EIGEN_BUILD_PKGCONFIG)

@@ -401,12 +390,15 @@ if(cmake_generator_tolower MATCHES "makefile")
  message(STATUS "--------------+--------------------------------------------------------------")
  message(STATUS "Command       |   Description")
  message(STATUS "--------------+--------------------------------------------------------------")
-  message(STATUS "make install  | Install to ${CMAKE_INSTALL_PREFIX}. To change that:")
-  message(STATUS "              |     cmake . -DCMAKE_INSTALL_PREFIX=yourpath")
-  message(STATUS "              |   Eigen headers will then be installed to:")
-  message(STATUS "              |     ${INCLUDE_INSTALL_DIR}")
-  message(STATUS "              |   To install Eigen headers to a separate location, do:")
-  message(STATUS "              |     cmake . -DEIGEN_INCLUDE_INSTALL_DIR=yourpath")
+  message(STATUS "make install  | Install Eigen. Headers will be installed to:")
+  message(STATUS "              |     <CMAKE_INSTALL_PREFIX>/<INCLUDE_INSTALL_DIR>")
+  message(STATUS "              |   Using the following values:")
+  message(STATUS "              |     CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
+  message(STATUS "              |     INCLUDE_INSTALL_DIR:  ${INCLUDE_INSTALL_DIR}")
+  message(STATUS "              |   Change the install location of Eigen headers using:")
+  message(STATUS "              |     cmake . -DCMAKE_INSTALL_PREFIX=yourprefix")
+  message(STATUS "              |   Or:")
+  message(STATUS "              |     cmake . -DINCLUDE_INSTALL_DIR=yourdir")
  message(STATUS "make doc      | Generate the API documentation, requires Doxygen & LaTeX")
  message(STATUS "make check    | Build and run the unit-tests. Read this page:")
  message(STATUS "              |   http://eigen.tuxfamily.org/index.php?title=Tests")
--- a/Eigen/CholmodSupport
+++ b/Eigen/CholmodSupport
@@ -12,7 +12,7 @@ extern "C" {
 /** \ingroup Support_modules
  * \defgroup CholmodSupport_Module CholmodSupport module
  *
-  * This module provides an interface to the Cholmod library which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package.
+  * This module provides an interface to the Cholmod library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
  * It provides the two following main factorization classes:
  * - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization.
  * - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -123,7 +123,7 @@
    #undef bool
    #undef vector
    #undef pixel
-  #elif defined  __ARM_NEON__
+  #elif defined  __ARM_NEON
    #define EIGEN_VECTORIZE
    #define EIGEN_VECTORIZE_NEON
    #include <arm_neon.h>
--- a/Eigen/SPQRSupport
+++ b/Eigen/SPQRSupport
@@ -10,7 +10,7 @@
 /** \ingroup Support_modules
  * \defgroup SPQRSupport_Module SuiteSparseQR module
  * 
-  * This module provides an interface to the SPQR library, which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package.
+  * This module provides an interface to the SPQR library, which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
  *
  * \code
  * #include <Eigen/SPQRSupport>
--- a/Eigen/SparseCore
+++ b/Eigen/SparseCore
@@ -14,7 +14,7 @@
 /** 
  * \defgroup SparseCore_Module SparseCore module
  *
-  * This module provides a sparse matrix representation, and basic associatd matrix manipulations
+  * This module provides a sparse matrix representation, and basic associated matrix manipulations
  * and operations.
  *
  * See the \ref TutorialSparse "Sparse tutorial"
--- a/Eigen/UmfPackSupport
+++ b/Eigen/UmfPackSupport
@@ -12,7 +12,7 @@ extern "C" {
 /** \ingroup Support_modules
  * \defgroup UmfPackSupport_Module UmfPackSupport module
  *
-  * This module provides an interface to the UmfPack library which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package.
+  * This module provides an interface to the UmfPack library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
  * It provides the following factorization class:
  * - class UmfPackLU: a multifrontal sequential LU factorization.
  *
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -235,6 +235,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
    }

  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }

    /** \internal
      * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
@@ -434,6 +439,8 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
 template<typename MatrixType, int _UpLo>
 LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
 {
+  check_template_parameters();
+  
  eigen_assert(a.rows()==a.cols());
  const Index size = a.rows();

@@ -457,7 +464,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
  */
 template<typename MatrixType, int _UpLo>
 template<typename Derived>
-LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename NumTraits<typename MatrixType::Scalar>::Real& sigma)
+LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename LDLT<MatrixType,_UpLo>::RealScalar& sigma)
 {
  const Index size = w.rows();
  if (m_isInitialized)
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -174,6 +174,12 @@ template<typename _MatrixType, int _UpLo> class LLT
    LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);

  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+    
    /** \internal
      * Used to compute and store L
      * The strict upper part is not used and even not initialized.
@@ -283,7 +289,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
        return k;
      mat.coeffRef(k,k) = x = sqrt(x);
      if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint();
-      if (rs>0) A21 *= RealScalar(1)/x;
+      if (rs>0) A21 /= x;
    }
    return -1;
  }
@@ -384,6 +390,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
 template<typename MatrixType, int _UpLo>
 LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
 {
+  check_template_parameters();
+  
  eigen_assert(a.rows()==a.cols());
  const Index size = a.rows();
  m_matrix.resize(size, size);
--- a/Eigen/src/Cholesky/LLT_MKL.h
+++ b/Eigen/src/Cholesky/LLT_MKL.h
@@ -60,7 +60,7 @@ template<> struct mkl_llt<EIGTYPE> \
    lda = m.outerStride(); \
 \
    info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \
-    info = (info==0) ? Success : NumericalIssue; \
+    info = (info==0) ? -1 : info>0 ? info-1 : size; \
    return info; \
  } \
 }; \
--- a/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -14,34 +14,40 @@ namespace Eigen {

 namespace internal {

-template<typename Scalar, typename CholmodType>
-void cholmod_configure_matrix(CholmodType& mat)
-{
-  if (internal::is_same<Scalar,float>::value)
-  {
-    mat.xtype = CHOLMOD_REAL;
-    mat.dtype = CHOLMOD_SINGLE;
-  }
-  else if (internal::is_same<Scalar,double>::value)
-  {
+template<typename Scalar> struct cholmod_configure_matrix;
+
+template<> struct cholmod_configure_matrix<double> {
+  template<typename CholmodType>
+  static void run(CholmodType& mat) {
    mat.xtype = CHOLMOD_REAL;
    mat.dtype = CHOLMOD_DOUBLE;
  }
-  else if (internal::is_same<Scalar,std::complex<float> >::value)
-  {
-    mat.xtype = CHOLMOD_COMPLEX;
-    mat.dtype = CHOLMOD_SINGLE;
-  }
-  else if (internal::is_same<Scalar,std::complex<double> >::value)
-  {
+};
+
+template<> struct cholmod_configure_matrix<std::complex<double> > {
+  template<typename CholmodType>
+  static void run(CholmodType& mat) {
    mat.xtype = CHOLMOD_COMPLEX;
    mat.dtype = CHOLMOD_DOUBLE;
  }
-  else
-  {
-    eigen_assert(false && "Scalar type not supported by CHOLMOD");
-  }
-}
+};
+
+// Other scalar types are not yet suppotred by Cholmod
+// template<> struct cholmod_configure_matrix<float> {
+//   template<typename CholmodType>
+//   static void run(CholmodType& mat) {
+//     mat.xtype = CHOLMOD_REAL;
+//     mat.dtype = CHOLMOD_SINGLE;
+//   }
+// };
+//
+// template<> struct cholmod_configure_matrix<std::complex<float> > {
+//   template<typename CholmodType>
+//   static void run(CholmodType& mat) {
+//     mat.xtype = CHOLMOD_COMPLEX;
+//     mat.dtype = CHOLMOD_SINGLE;
+//   }
+// };

 } // namespace internal

@@ -78,7 +84,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
  {
    res.itype = CHOLMOD_INT;
  }
-  else if (internal::is_same<_Index,UF_long>::value)
+  else if (internal::is_same<_Index,SuiteSparse_long>::value)
  {
    res.itype = CHOLMOD_LONG;
  }
@@ -88,7 +94,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
  }

  // setup res.xtype
-  internal::cholmod_configure_matrix<_Scalar>(res);
+  internal::cholmod_configure_matrix<_Scalar>::run(res);
  
  res.stype = 0;
  
@@ -131,7 +137,7 @@ cholmod_dense viewAsCholmod(MatrixBase<Derived>& mat)
  res.x      = (void*)(mat.derived().data());
  res.z      = 0;

-  internal::cholmod_configure_matrix<Scalar>(res);
+  internal::cholmod_configure_matrix<Scalar>::run(res);

  return res;
 }
@@ -172,14 +178,16 @@ class CholmodBase : internal::noncopyable
    CholmodBase()
      : m_cholmodFactor(0), m_info(Success), m_isInitialized(false)
    {
-      m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
+      EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
+      m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
      cholmod_start(&m_cholmod);
    }

    CholmodBase(const MatrixType& matrix)
      : m_cholmodFactor(0), m_info(Success), m_isInitialized(false)
    {
-      m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
+      EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
+      m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
      cholmod_start(&m_cholmod);
      compute(matrix);
    }
@@ -277,7 +285,7 @@ class CholmodBase : internal::noncopyable
      eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
      cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());
      cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod);
-      
+
      // If the factorization failed, minor is the column at which it did. On success minor == n.
      this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue);
      m_factorizationIsOk = true;
@@ -344,7 +352,7 @@ class CholmodBase : internal::noncopyable
      */
    Derived& setShift(const RealScalar& offset)
    {
-      m_shiftOffset[0] = offset;
+      m_shiftOffset[0] = double(offset);
      return derived();
    }
    
@@ -355,7 +363,7 @@ class CholmodBase : internal::noncopyable
  protected:
    mutable cholmod_common m_cholmod;
    cholmod_factor* m_cholmodFactor;
-    RealScalar m_shiftOffset[2];
+    double m_shiftOffset[2];
    mutable ComputationInfo m_info;
    bool m_isInitialized;
    int m_factorizationIsOk;
@@ -378,6 +386,8 @@ class CholmodBase : internal::noncopyable
  *
  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
  *
+  * \warning Only double precision real and complex scalar types are supported by Cholmod.
+  *
  * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT
  */
 template<typename _MatrixType, int _UpLo = Lower>
@@ -395,7 +405,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
    CholmodSimplicialLLT(const MatrixType& matrix) : Base()
    {
      init();
-      compute(matrix);
+      Base::compute(matrix);
    }

    ~CholmodSimplicialLLT() {}
@@ -425,6 +435,8 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
  *
  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
  *
+  * \warning Only double precision real and complex scalar types are supported by Cholmod.
+  *
  * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT
  */
 template<typename _MatrixType, int _UpLo = Lower>
@@ -442,7 +454,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
    CholmodSimplicialLDLT(const MatrixType& matrix) : Base()
    {
      init();
-      compute(matrix);
+      Base::compute(matrix);
    }

    ~CholmodSimplicialLDLT() {}
@@ -470,6 +482,8 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
  *
  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
  *
+  * \warning Only double precision real and complex scalar types are supported by Cholmod.
+  *
  * \sa \ref TutorialSparseDirectSolvers
  */
 template<typename _MatrixType, int _UpLo = Lower>
@@ -487,7 +501,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
    CholmodSupernodalLLT(const MatrixType& matrix) : Base()
    {
      init();
-      compute(matrix);
+      Base::compute(matrix);
    }

    ~CholmodSupernodalLLT() {}
@@ -517,6 +531,8 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
  *
  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
  *
+  * \warning Only double precision real and complex scalar types are supported by Cholmod.
+  *
  * \sa \ref TutorialSparseDirectSolvers
  */
 template<typename _MatrixType, int _UpLo = Lower>
@@ -534,7 +550,7 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
    CholmodDecomposition(const MatrixType& matrix) : Base()
    {
      init();
-      compute(matrix);
+      Base::compute(matrix);
    }

    ~CholmodDecomposition() {}
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -124,6 +124,21 @@ class Array
    }
 #endif

+#ifdef EIGEN_HAVE_RVALUE_REFERENCES
+    Array(Array&& other)
+      : Base(std::move(other))
+    {
+      Base::_check_template_params();
+      if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
+        Base::_set_noalias(other);
+    }
+    Array& operator=(Array&& other)
+    {
+      other.swap(*this);
+      return *this;
+    }
+#endif
+
    /** Constructs a vector or row-vector with given dimension. \only_for_vectors
      *
      * Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
--- a/Eigen/src/Core/ArrayBase.h
+++ b/Eigen/src/Core/ArrayBase.h
@@ -46,9 +46,6 @@ template<typename Derived> class ArrayBase

    typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;

-    using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
-                typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
-
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
@@ -56,6 +53,7 @@ template<typename Derived> class ArrayBase
    typedef typename NumTraits<Scalar>::Real RealScalar;

    typedef DenseBase<Derived> Base;
+    using Base::operator*;
    using Base::RowsAtCompileTime;
    using Base::ColsAtCompileTime;
    using Base::SizeAtCompileTime;
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -439,19 +439,26 @@ struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Ve
  typedef typename Derived1::Index Index;
  static inline void run(Derived1 &dst, const Derived2 &src)
  {
-    typedef packet_traits<typename Derived1::Scalar> PacketTraits;
+    typedef typename Derived1::Scalar Scalar;
+    typedef packet_traits<Scalar> PacketTraits;
    enum {
      packetSize = PacketTraits::size,
      alignable = PacketTraits::AlignedOnScalar,
-      dstAlignment = alignable ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
+      dstIsAligned = assign_traits<Derived1,Derived2>::DstIsAligned,
+      dstAlignment = alignable ? Aligned : int(dstIsAligned),
      srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
    };
+    const Scalar *dst_ptr = &dst.coeffRef(0,0);
+    if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0)
+    {
+      // the pointer is not aligend-on scalar, so alignment is not possible
+      return assign_impl<Derived1,Derived2,DefaultTraversal,NoUnrolling>::run(dst, src);
+    }
    const Index packetAlignedMask = packetSize - 1;
    const Index innerSize = dst.innerSize();
    const Index outerSize = dst.outerSize();
    const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
-    Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
-                       : internal::first_aligned(&dst.coeffRef(0,0), innerSize);
+    Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize);

    for(Index outer = 0; outer < outerSize; ++outer)
    {
--- a/Eigen/src/Core/CommaInitializer.h
+++ b/Eigen/src/Core/CommaInitializer.h
@@ -76,9 +76,7 @@ struct CommaInitializer
  template<typename OtherDerived>
  CommaInitializer& operator,(const DenseBase<OtherDerived>& other)
  {
-    if(other.cols()==0 || other.rows()==0)
-      return *this;
-    if (m_col==m_xpr.cols())
+    if (m_col==m_xpr.cols() && (other.cols()!=0 || other.rows()!=m_currentBlockRows))
    {
      m_row+=m_currentBlockRows;
      m_col = 0;
@@ -86,24 +84,18 @@ struct CommaInitializer
      eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows()
        && "Too many rows passed to comma initializer (operator<<)");
    }
-    eigen_assert(m_col<m_xpr.cols()
+    eigen_assert((m_col + other.cols() <= m_xpr.cols())
      && "Too many coefficients passed to comma initializer (operator<<)");
    eigen_assert(m_currentBlockRows==other.rows());
-    if (OtherDerived::SizeAtCompileTime != Dynamic)
-      m_xpr.template block<OtherDerived::RowsAtCompileTime != Dynamic ? OtherDerived::RowsAtCompileTime : 1,
-                              OtherDerived::ColsAtCompileTime != Dynamic ? OtherDerived::ColsAtCompileTime : 1>
-                    (m_row, m_col) = other;
-    else
-      m_xpr.block(m_row, m_col, other.rows(), other.cols()) = other;
+    m_xpr.template block<OtherDerived::RowsAtCompileTime, OtherDerived::ColsAtCompileTime>
+                    (m_row, m_col, other.rows(), other.cols()) = other;
    m_col += other.cols();
    return *this;
  }

  inline ~CommaInitializer()
  {
-    eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows()
-         && m_col == m_xpr.cols()
-         && "Too few coefficients passed to comma initializer (operator<<)");
+      finished();
  }

  /** \returns the built matrix once all its coefficients have been set.
@@ -113,7 +105,12 @@ struct CommaInitializer
    * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished());
    * \endcode
    */
-  inline XprType& finished() { return m_xpr; }
+  inline XprType& finished() {
+      eigen_assert(((m_row+m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0)
+           && m_col == m_xpr.cols()
+           && "Too few coefficients passed to comma initializer (operator<<)");
+      return m_xpr;
+  }

  XprType& m_xpr;   // target expression
  Index m_row;              // current row id
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -81,7 +81,8 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
        )
     ),
    Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
-    CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits<BinaryOp>::Cost
+    Cost0 = EIGEN_ADD_COST(LhsCoeffReadCost,RhsCoeffReadCost),
+    CoeffReadCost = EIGEN_ADD_COST(Cost0,functor_traits<BinaryOp>::Cost)
  };
 };
 } // end namespace internal
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -47,7 +47,7 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
    Flags = _XprTypeNested::Flags & (
      HereditaryBits | LinearAccessBit | AlignedBit
      | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
-    CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits<UnaryOp>::Cost
+    CoeffReadCost = EIGEN_ADD_COST(_XprTypeNested::CoeffReadCost, functor_traits<UnaryOp>::Cost)
  };
 };
 }
--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h
@@ -38,7 +38,7 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
  typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
  enum {
    Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
-    CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits<ViewOp>::Cost,
+    CoeffReadCost = EIGEN_ADD_COST(traits<_MatrixTypeNested>::CoeffReadCost, functor_traits<ViewOp>::Cost),
    MatrixTypeInnerStride =  inner_stride_at_compile_time<MatrixType>::ret,
    // need to cast the sizeof's from size_t to int explicitly, otherwise:
    // "error: no integral type can represent all of the enumerator values
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -40,15 +40,14 @@ static inline void check_DenseIndex_is_signed() {
  */
 template<typename Derived> class DenseBase
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-  : public internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
-                                     typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>
+  : public internal::special_scalar_op_base<Derived, typename internal::traits<Derived>::Scalar,
+                                            typename NumTraits<typename internal::traits<Derived>::Scalar>::Real,
+                                            DenseCoeffsBase<Derived> >
 #else
  : public DenseCoeffsBase<Derived>
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 {
  public:
-    using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
-                typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;

    class InnerIterator;

@@ -63,8 +62,9 @@ template<typename Derived> class DenseBase
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
+    typedef internal::special_scalar_op_base<Derived,Scalar,RealScalar, DenseCoeffsBase<Derived> > Base;

-    typedef DenseCoeffsBase<Derived> Base;
+    using Base::operator*;
    using Base::derived;
    using Base::const_cast_derived;
    using Base::rows;
@@ -183,10 +183,6 @@ template<typename Derived> class DenseBase
    /** \returns the number of nonzero coefficients which is in practice the number
      * of stored coefficients. */
    inline Index nonZeros() const { return size(); }
-    /** \returns true if either the number of rows or the number of columns is equal to 1.
-      * In other words, this function returns
-      * \code rows()==1 || cols()==1 \endcode
-      * \sa rows(), cols(), IsVectorAtCompileTime. */

    /** \returns the outer size.
      *
@@ -266,11 +262,13 @@ template<typename Derived> class DenseBase
    template<typename OtherDerived>
    Derived& operator=(const ReturnByValue<OtherDerived>& func);

-#ifndef EIGEN_PARSED_BY_DOXYGEN
-    /** Copies \a other into *this without evaluating other. \returns a reference to *this. */
+    /** \internal Copies \a other into *this without evaluating other. \returns a reference to *this. */
    template<typename OtherDerived>
    Derived& lazyAssign(const DenseBase<OtherDerived>& other);
-#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+    /** \internal Evaluates \a other into *this. \returns a reference to *this. */
+    template<typename OtherDerived>
+    Derived& lazyAssign(const ReturnByValue<OtherDerived>& other);

    CommaInitializer<Derived> operator<< (const Scalar& s);

--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -122,33 +122,41 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
 {
    internal::plain_array<T,Size,_Options> m_data;
  public:
-    inline DenseStorage() {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
+    DenseStorage() {}
+    DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()) {}
-    inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
-    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
-    static inline DenseIndex rows(void) {return _Rows;}
-    static inline DenseIndex cols(void) {return _Cols;}
-    inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
-    inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
-    inline const T *data() const { return m_data.array; }
-    inline T *data() { return m_data.array; }
+    DenseStorage(const DenseStorage& other) : m_data(other.m_data) {}
+    DenseStorage& operator=(const DenseStorage& other)
+    {
+      if (this != &other) m_data = other.m_data;
+      return *this;
+    }
+    DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
+    void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
+    static DenseIndex rows(void) {return _Rows;}
+    static DenseIndex cols(void) {return _Cols;}
+    void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
+    void resize(DenseIndex,DenseIndex,DenseIndex) {}
+    const T *data() const { return m_data.array; }
+    T *data() { return m_data.array; }
 };

 // null matrix
 template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
 {
  public:
-    inline DenseStorage() {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert) {}
-    inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
-    inline void swap(DenseStorage& ) {}
-    static inline DenseIndex rows(void) {return _Rows;}
-    static inline DenseIndex cols(void) {return _Cols;}
-    inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
-    inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
-    inline const T *data() const { return 0; }
-    inline T *data() { return 0; }
+    DenseStorage() {}
+    DenseStorage(internal::constructor_without_unaligned_array_assert) {}
+    DenseStorage(const DenseStorage&) {}
+    DenseStorage& operator=(const DenseStorage&) { return *this; }
+    DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
+    void swap(DenseStorage& ) {}
+    static DenseIndex rows(void) {return _Rows;}
+    static DenseIndex cols(void) {return _Cols;}
+    void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
+    void resize(DenseIndex,DenseIndex,DenseIndex) {}
+    const T *data() const { return 0; }
+    T *data() { return 0; }
 };

 // more specializations for null matrices; these are necessary to resolve ambiguities
@@ -168,18 +176,29 @@ template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic
    DenseIndex m_rows;
    DenseIndex m_cols;
  public:
-    inline DenseStorage() : m_rows(0), m_cols(0) {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
+    DenseStorage() : m_rows(0), m_cols(0) {}
+    DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
-    inline DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) : m_rows(nbRows), m_cols(nbCols) {}
-    inline void swap(DenseStorage& other)
+    DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
+    DenseStorage& operator=(const DenseStorage& other)
+    {
+      if (this != &other)
+      {
+        m_data = other.m_data;
+        m_rows = other.m_rows;
+        m_cols = other.m_cols;
+      }
+      return *this;
+    }
+    DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) : m_rows(nbRows), m_cols(nbCols) {}
+    void swap(DenseStorage& other)
    { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
-    inline DenseIndex rows() const {return m_rows;}
-    inline DenseIndex cols() const {return m_cols;}
-    inline void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
-    inline void resize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
-    inline const T *data() const { return m_data.array; }
-    inline T *data() { return m_data.array; }
+    DenseIndex rows() const {return m_rows;}
+    DenseIndex cols() const {return m_cols;}
+    void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
+    void resize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
+    const T *data() const { return m_data.array; }
+    T *data() { return m_data.array; }
 };

 // dynamic-size matrix with fixed-size storage and fixed width
@@ -188,17 +207,27 @@ template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Si
    internal::plain_array<T,Size,_Options> m_data;
    DenseIndex m_rows;
  public:
-    inline DenseStorage() : m_rows(0) {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
+    DenseStorage() : m_rows(0) {}
+    DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
-    inline DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex) : m_rows(nbRows) {}
-    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
-    inline DenseIndex rows(void) const {return m_rows;}
-    inline DenseIndex cols(void) const {return _Cols;}
-    inline void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
-    inline void resize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
-    inline const T *data() const { return m_data.array; }
-    inline T *data() { return m_data.array; }
+    DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
+    DenseStorage& operator=(const DenseStorage& other)
+    {
+      if (this != &other)
+      {
+        m_data = other.m_data;
+        m_rows = other.m_rows;
+      }
+      return *this;
+    }
+    DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex) : m_rows(nbRows) {}
+    void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+    DenseIndex rows(void) const {return m_rows;}
+    DenseIndex cols(void) const {return _Cols;}
+    void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
+    void resize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
+    const T *data() const { return m_data.array; }
+    T *data() { return m_data.array; }
 };

 // dynamic-size matrix with fixed-size storage and fixed height
@@ -207,17 +236,27 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si
    internal::plain_array<T,Size,_Options> m_data;
    DenseIndex m_cols;
  public:
-    inline DenseStorage() : m_cols(0) {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
+    DenseStorage() : m_cols(0) {}
+    DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
-    inline DenseStorage(DenseIndex, DenseIndex, DenseIndex nbCols) : m_cols(nbCols) {}
-    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
-    inline DenseIndex rows(void) const {return _Rows;}
-    inline DenseIndex cols(void) const {return m_cols;}
-    inline void conservativeResize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
-    inline void resize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
-    inline const T *data() const { return m_data.array; }
-    inline T *data() { return m_data.array; }
+    DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
+    DenseStorage& operator=(const DenseStorage& other)
+    {
+      if (this != &other)
+      {
+        m_data = other.m_data;
+        m_cols = other.m_cols;
+      }
+      return *this;
+    }
+    DenseStorage(DenseIndex, DenseIndex, DenseIndex nbCols) : m_cols(nbCols) {}
+    void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+    DenseIndex rows(void) const {return _Rows;}
+    DenseIndex cols(void) const {return m_cols;}
+    void conservativeResize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
+    void resize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
+    const T *data() const { return m_data.array; }
+    T *data() { return m_data.array; }
 };

 // purely dynamic matrix.
@@ -227,18 +266,35 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
    DenseIndex m_rows;
    DenseIndex m_cols;
  public:
-    inline DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
+    DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
+    DenseStorage(internal::constructor_without_unaligned_array_assert)
       : m_data(0), m_rows(0), m_cols(0) {}
-    inline DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
+    DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows), m_cols(nbCols)
    { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
-    inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
-    inline void swap(DenseStorage& other)
+#ifdef EIGEN_HAVE_RVALUE_REFERENCES
+    DenseStorage(DenseStorage&& other)
+      : m_data(std::move(other.m_data))
+      , m_rows(std::move(other.m_rows))
+      , m_cols(std::move(other.m_cols))
+    {
+      other.m_data = nullptr;
+    }
+    DenseStorage& operator=(DenseStorage&& other)
+    {
+      using std::swap;
+      swap(m_data, other.m_data);
+      swap(m_rows, other.m_rows);
+      swap(m_cols, other.m_cols);
+      return *this;
+    }
+#endif
+    ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
+    void swap(DenseStorage& other)
    { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
-    inline DenseIndex rows(void) const {return m_rows;}
-    inline DenseIndex cols(void) const {return m_cols;}
-    inline void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
+    DenseIndex rows(void) const {return m_rows;}
+    DenseIndex cols(void) const {return m_cols;}
+    void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
    {
      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
      m_rows = nbRows;
@@ -258,8 +314,11 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
      m_rows = nbRows;
      m_cols = nbCols;
    }
-    inline const T *data() const { return m_data; }
-    inline T *data() { return m_data; }
+    const T *data() const { return m_data; }
+    T *data() { return m_data; }
+  private:
+    DenseStorage(const DenseStorage&);
+    DenseStorage& operator=(const DenseStorage&);
 };

 // matrix with dynamic width and fixed height (so that matrix has dynamic size).
@@ -268,15 +327,30 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
    T *m_data;
    DenseIndex m_cols;
  public:
-    inline DenseStorage() : m_data(0), m_cols(0) {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
-    inline DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols)
+    DenseStorage() : m_data(0), m_cols(0) {}
+    DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
+    DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols)
    { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
-    inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
-    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
-    static inline DenseIndex rows(void) {return _Rows;}
-    inline DenseIndex cols(void) const {return m_cols;}
-    inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex nbCols)
+#ifdef EIGEN_HAVE_RVALUE_REFERENCES
+    DenseStorage(DenseStorage&& other)
+      : m_data(std::move(other.m_data))
+      , m_cols(std::move(other.m_cols))
+    {
+      other.m_data = nullptr;
+    }
+    DenseStorage& operator=(DenseStorage&& other)
+    {
+      using std::swap;
+      swap(m_data, other.m_data);
+      swap(m_cols, other.m_cols);
+      return *this;
+    }
+#endif
+    ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
+    void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+    static DenseIndex rows(void) {return _Rows;}
+    DenseIndex cols(void) const {return m_cols;}
+    void conservativeResize(DenseIndex size, DenseIndex, DenseIndex nbCols)
    {
      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
      m_cols = nbCols;
@@ -294,8 +368,11 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
      }
      m_cols = nbCols;
    }
-    inline const T *data() const { return m_data; }
-    inline T *data() { return m_data; }
+    const T *data() const { return m_data; }
+    T *data() { return m_data; }
+  private:
+    DenseStorage(const DenseStorage&);
+    DenseStorage& operator=(const DenseStorage&);
 };

 // matrix with dynamic height and fixed width (so that matrix has dynamic size).
@@ -304,15 +381,30 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
    T *m_data;
    DenseIndex m_rows;
  public:
-    inline DenseStorage() : m_data(0), m_rows(0) {}
-    inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
-    inline DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows)
+    DenseStorage() : m_data(0), m_rows(0) {}
+    DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
+    DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows)
    { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
-    inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
-    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
-    inline DenseIndex rows(void) const {return m_rows;}
-    static inline DenseIndex cols(void) {return _Cols;}
-    inline void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex)
+#ifdef EIGEN_HAVE_RVALUE_REFERENCES
+    DenseStorage(DenseStorage&& other)
+      : m_data(std::move(other.m_data))
+      , m_rows(std::move(other.m_rows))
+    {
+      other.m_data = nullptr;
+    }
+    DenseStorage& operator=(DenseStorage&& other)
+    {
+      using std::swap;
+      swap(m_data, other.m_data);
+      swap(m_rows, other.m_rows);
+      return *this;
+    }
+#endif
+    ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
+    void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+    DenseIndex rows(void) const {return m_rows;}
+    static DenseIndex cols(void) {return _Cols;}
+    void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex)
    {
      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
      m_rows = nbRows;
@@ -330,8 +422,11 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
      }
      m_rows = nbRows;
    }
-    inline const T *data() const { return m_data; }
-    inline T *data() { return m_data; }
+    const T *data() const { return m_data; }
+    T *data() { return m_data; }
+  private:
+    DenseStorage(const DenseStorage&);
+    DenseStorage& operator=(const DenseStorage&);
 };

 } // end namespace Eigen
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -44,10 +44,10 @@ class DiagonalBase : public EigenBase<Derived>
    template<typename DenseDerived>
    void evalTo(MatrixBase<DenseDerived> &other) const;
    template<typename DenseDerived>
-    void addTo(MatrixBase<DenseDerived> &other) const
+    inline void addTo(MatrixBase<DenseDerived> &other) const
    { other.diagonal() += diagonal(); }
    template<typename DenseDerived>
-    void subTo(MatrixBase<DenseDerived> &other) const
+    inline void subTo(MatrixBase<DenseDerived> &other) const
    { other.diagonal() -= diagonal(); }

    inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
@@ -98,7 +98,7 @@ class DiagonalBase : public EigenBase<Derived>

 template<typename Derived>
 template<typename DenseDerived>
-void DiagonalBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
+inline void DiagonalBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
 {
  other.setZero();
  other.diagonal() = diagonal();
--- a/Eigen/src/Core/DiagonalProduct.h
+++ b/Eigen/src/Core/DiagonalProduct.h
@@ -34,8 +34,9 @@ struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
    _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
    _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0,

-    Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit,//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
-    CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost
+    Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0),//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
+    Cost0 = EIGEN_ADD_COST(NumTraits<Scalar>::MulCost, MatrixType::CoeffReadCost),
+    CoeffReadCost = EIGEN_ADD_COST(Cost0,DiagonalType::DiagonalVectorType::CoeffReadCost)
  };
 };
 }
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -59,7 +59,7 @@ struct dot_nocheck<T, U, true>
  */
 template<typename Derived>
 template<typename OtherDerived>
-typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
+inline typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
 MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -259,6 +259,47 @@ template<> struct functor_traits<scalar_boolean_or_op> {
  };
 };

+/** \internal
+  * \brief Template functors for comparison of two scalars
+  * \todo Implement packet-comparisons
+  */
+template<typename Scalar, ComparisonName cmp> struct scalar_cmp_op;
+
+template<typename Scalar, ComparisonName cmp>
+struct functor_traits<scalar_cmp_op<Scalar, cmp> > {
+  enum {
+    Cost = NumTraits<Scalar>::AddCost,
+    PacketAccess = false
+  };
+};
+
+template<ComparisonName Cmp, typename Scalar>
+struct result_of<scalar_cmp_op<Scalar, Cmp>(Scalar,Scalar)> {
+  typedef bool type;
+};
+
+
+template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_EQ> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;}
+};
+template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LT> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<b;}
+};
+template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LE> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;}
+};
+template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_UNORD> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);}
+};
+template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_NEQ> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;}
+};
+
 // unary functors:

 /** \internal
@@ -928,6 +969,8 @@ template<typename T>
 struct functor_traits<std::not_equal_to<T> >
 { enum { Cost = 1, PacketAccess = false }; };

+#if(__cplusplus < 201103L)
+// std::binder* are deprecated since c++11 and will be removed in c++17
 template<typename T>
 struct functor_traits<std::binder2nd<T> >
 { enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
@@ -935,6 +978,7 @@ struct functor_traits<std::binder2nd<T> >
 template<typename T>
 struct functor_traits<std::binder1st<T> >
 { enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
+#endif

 template<typename T>
 struct functor_traits<std::unary_negate<T> >
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -205,9 +205,6 @@ class GeneralProduct<Lhs, Rhs, InnerProduct>
  public:
    GeneralProduct(const Lhs& lhs, const Rhs& rhs)
    {
-      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
-        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-
      Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
    }

@@ -257,15 +254,13 @@ template<typename Lhs, typename Rhs>
 class GeneralProduct<Lhs, Rhs, OuterProduct>
  : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
 {
-    template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
+    template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
    
  public:
    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)

    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
    {
-      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
-        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
    }
    
    struct set  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived()  = src; } };
@@ -281,22 +276,22 @@ class GeneralProduct<Lhs, Rhs, OuterProduct>
    
    template<typename Dest>
    inline void evalTo(Dest& dest) const {
-      internal::outer_product_selector_run(*this, dest, set(), IsRowMajor<Dest>());
+      internal::outer_product_selector_run(*this, dest, set(), is_row_major<Dest>());
    }
    
    template<typename Dest>
    inline void addTo(Dest& dest) const {
-      internal::outer_product_selector_run(*this, dest, add(), IsRowMajor<Dest>());
+      internal::outer_product_selector_run(*this, dest, add(), is_row_major<Dest>());
    }

    template<typename Dest>
    inline void subTo(Dest& dest) const {
-      internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor<Dest>());
+      internal::outer_product_selector_run(*this, dest, sub(), is_row_major<Dest>());
    }

    template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
    {
-      internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor<Dest>());
+      internal::outer_product_selector_run(*this, dest, adds(alpha), is_row_major<Dest>());
    }
 };

@@ -425,15 +420,18 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true>
    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());

+    // make sure Dest is a compile-time vector type (bug 1166)
+    typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
+
    enum {
      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
      // on, the other hand it is good for the cache to pack the vector anyways...
-      EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
+      EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
-      MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
+      MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal
    };

-    gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
+    gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;

    bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
    bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
@@ -522,7 +520,7 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
        actualLhs.rows(), actualLhs.cols(),
        actualLhs.data(), actualLhs.outerStride(),
        actualRhsPtr, 1,
-        dest.data(), dest.innerStride(),
+        dest.data(), dest.col(0).innerStride(), //NOTE  if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166)
        actualAlpha);
  }
 };
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -183,8 +183,8 @@ template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const
 /** \internal tries to do cache prefetching of \a addr */
 template<typename Scalar> inline void prefetch(const Scalar* addr)
 {
-#if !defined(_MSC_VER)
-__builtin_prefetch(addr);
+#if (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
+  __builtin_prefetch(addr);
 #endif
 }

--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -123,7 +123,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
      return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
    }

-    inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
+    explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
    {
      EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
      checkSanity();
@@ -149,6 +149,10 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
      checkSanity();
    }

+    #ifdef EIGEN_MAPBASE_PLUGIN
+    #include EIGEN_MAPBASE_PLUGIN
+    #endif
+
  protected:

    void checkSanity() const
@@ -157,7 +161,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
                                        internal::inner_stride_at_compile_time<Derived>::ret==1),
                          PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
      eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
-                   && "data is not aligned");
+                   && "input pointer is not aligned on a 16 byte boundary");
    }

    PointerType m_data;
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -218,8 +218,8 @@ struct conj_retval
 * Implementation of abs2                                                 *
 ****************************************************************************/

-template<typename Scalar>
-struct abs2_impl
+template<typename Scalar,bool IsComplex>
+struct abs2_impl_default
 {
  typedef typename NumTraits<Scalar>::Real RealScalar;
  static inline RealScalar run(const Scalar& x)
@@ -228,15 +228,26 @@ struct abs2_impl
  }
 };

-template<typename RealScalar>
-struct abs2_impl<std::complex<RealScalar> >
+template<typename Scalar>
+struct abs2_impl_default<Scalar, true> // IsComplex
 {
-  static inline RealScalar run(const std::complex<RealScalar>& x)
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  static inline RealScalar run(const Scalar& x)
  {
    return real(x)*real(x) + imag(x)*imag(x);
  }
 };

+template<typename Scalar>
+struct abs2_impl
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  static inline RealScalar run(const Scalar& x)
+  {
+    return abs2_impl_default<Scalar,NumTraits<Scalar>::IsComplex>::run(x);
+  }
+};
+
 template<typename Scalar>
 struct abs2_retval
 {
@@ -294,7 +305,7 @@ struct hypot_impl
    RealScalar _x = abs(x);
    RealScalar _y = abs(y);
    RealScalar p = (max)(_x, _y);
-    if(p==RealScalar(0)) return 0;
+    if(p==RealScalar(0)) return RealScalar(0);
    RealScalar q = (min)(_x, _y);
    RealScalar qp = q/p;
    return p * sqrt(RealScalar(1) + qp*qp);
@@ -496,11 +507,24 @@ struct floor_log2<n, lower, upper, floor_log2_bogus>
 template<typename Scalar>
 struct random_default_impl<Scalar, false, true>
 {
-  typedef typename NumTraits<Scalar>::NonInteger NonInteger;
-
  static inline Scalar run(const Scalar& x, const Scalar& y)
  {
-    return x + Scalar((NonInteger(y)-x+1) * std::rand() / (RAND_MAX + NonInteger(1)));
+    typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
+    if(y<x)
+      return x;
+    // the following difference might overflow on a 32 bits system,
+    // but since y>=x the result converted to an unsigned long is still correct.
+    std::size_t range = ScalarX(y)-ScalarX(x);
+    std::size_t offset = 0;
+    // rejection sampling
+    std::size_t divisor = 1;
+    std::size_t multiplier = 1;
+    if(range<RAND_MAX) divisor = (std::size_t(RAND_MAX)+1)/(range+1);
+    else               multiplier = 1 + range/(std::size_t(RAND_MAX)+1);
+    do {
+      offset = (std::size_t(std::rand()) * multiplier) / divisor;
+    } while (offset > range);
+    return Scalar(ScalarX(x) + offset);
  }

  static inline Scalar run()
@@ -707,21 +731,21 @@ struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::

 template<typename Scalar, typename OtherScalar>
 inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
-                                   typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
+                              const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
 {
  return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
 }

 template<typename Scalar>
 inline bool isApprox(const Scalar& x, const Scalar& y,
-                          typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
+                     const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
 {
  return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
 }

 template<typename Scalar>
 inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
-                                    typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
+                               const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
 {
  return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);
 }
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -211,6 +211,21 @@ class Matrix
      : Base(internal::constructor_without_unaligned_array_assert())
    { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }

+#ifdef EIGEN_HAVE_RVALUE_REFERENCES
+    Matrix(Matrix&& other)
+      : Base(std::move(other))
+    {
+      Base::_check_template_params();
+      if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
+        Base::_set_noalias(other);
+    }
+    Matrix& operator=(Matrix&& other)
+    {
+      other.swap(*this);
+      return *this;
+    }
+#endif
+
    /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
      *
      * Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -159,13 +159,11 @@ template<typename Derived> class MatrixBase
    template<typename OtherDerived>
    Derived& operator=(const ReturnByValue<OtherDerived>& other);

-#ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename ProductDerived, typename Lhs, typename Rhs>
    Derived& lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other);

    template<typename MatrixPower, typename Lhs, typename Rhs>
    Derived& lazyAssign(const MatrixPowerProduct<MatrixPower, Lhs,Rhs>& other);
-#endif // not EIGEN_PARSED_BY_DOXYGEN

    template<typename OtherDerived>
    Derived& operator+=(const MatrixBase<OtherDerived>& other);
@@ -442,6 +440,15 @@ template<typename Derived> class MatrixBase
    template<typename OtherScalar>
    void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);

+///////// SparseCore module /////////
+
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE const typename SparseMatrixBase<OtherDerived>::template CwiseProductDenseReturnType<Derived>::Type
+    cwiseProduct(const SparseMatrixBase<OtherDerived> &other) const
+    {
+      return other.cwiseProduct(derived());
+    }
+
 ///////// MatrixFunctions module /////////

    typedef typename internal::stem_function<Scalar>::type StemFunction;
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -250,6 +250,35 @@ class PermutationBase : public EigenBase<Derived>
    template<typename Other> friend
    inline PlainPermutationType operator*(const Transpose<PermutationBase<Other> >& other, const PermutationBase& perm)
    { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); }
+    
+    /** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation.
+      *
+      * This function is O(\c n) procedure allocating a buffer of \c n booleans.
+      */
+    Index determinant() const
+    {
+      Index res = 1;
+      Index n = size();
+      Matrix<bool,RowsAtCompileTime,1,0,MaxRowsAtCompileTime> mask(n);
+      mask.fill(false);
+      Index r = 0;
+      while(r < n)
+      {
+        // search for the next seed
+        while(r<n && mask[r]) r++;
+        if(r>=n)
+          break;
+        // we got one, let's follow it until we are back to the seed
+        Index k0 = r++;
+        mask.coeffRef(k0) = true;
+        for(Index k=indices().coeff(k0); k!=k0; k=indices().coeff(k))
+        {
+          mask.coeffRef(k) = true;
+          res = -res;
+        }
+      }
+      return res;
+    }

  protected:

@@ -555,10 +584,11 @@ struct permut_matrix_product_retval
      const Index n = Side==OnTheLeft ? rows() : cols();
      // FIXME we need an is_same for expression that is not sensitive to constness. For instance
      // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
+      const typename Dest::Scalar *dst_data = internal::extract_data(dst);
      if(    is_same<MatrixTypeNestedCleaned,Dest>::value
          && blas_traits<MatrixTypeNestedCleaned>::HasUsableDirectAccess
          && blas_traits<Dest>::HasUsableDirectAccess
-          && extract_data(dst) == extract_data(m_matrix))
+          && dst_data!=0 && dst_data == extract_data(m_matrix))
      {
        // apply the permutation inplace
        Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -315,8 +315,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
    {
      const OtherDerived& other = _other.derived();
-      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.rows(), other.cols());
-      const Index othersize = other.rows()*other.cols();
+      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(Index(other.rows()), Index(other.cols()));
+      const Index othersize = Index(other.rows())*Index(other.cols());
      if(RowsAtCompileTime == 1)
      {
        eigen_assert(other.rows() == 1 || other.cols() == 1);
@@ -437,6 +437,36 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    }
 #endif

+#ifdef EIGEN_HAVE_RVALUE_REFERENCES
+    PlainObjectBase(PlainObjectBase&& other)
+      : m_storage( std::move(other.m_storage) )
+    {
+    }
+
+    PlainObjectBase& operator=(PlainObjectBase&& other)
+    {
+      using std::swap;
+      swap(m_storage, other.m_storage);
+      return *this;
+    }
+#endif
+
+    /** Copy constructor */
+    EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
+      : m_storage()
+    {
+      _check_template_params();
+      lazyAssign(other);
+    }
+
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
+      : m_storage()
+    {
+      _check_template_params();
+      lazyAssign(other);
+    }
+
    EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols)
      : m_storage(a_size, nbRows, nbCols)
    {
@@ -457,7 +487,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
    template<typename OtherDerived>
    EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
-      : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
+      : m_storage(Index(other.derived().rows()) * Index(other.derived().cols()), other.derived().rows(), other.derived().cols())
    {
      _check_template_params();
      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.derived().rows(), other.derived().cols());
@@ -573,6 +603,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
                 : (rows() == other.rows() && cols() == other.cols())))
        && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
      EIGEN_ONLY_USED_FOR_DEBUG(other);
+      if(this->size()==0)
+        resizeLike(other);
      #else
      resizeLike(other);
      #endif
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -247,8 +247,9 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
  }
 };

-template<typename Func, typename Derived>
-struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
+// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
+template<typename Func, typename Derived, int Unrolling>
+struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
 {
  typedef typename Derived::Scalar Scalar;
  typedef typename packet_traits<Scalar>::type PacketScalar;
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -108,7 +108,8 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
      OuterStrideMatch = Derived::IsVectorAtCompileTime
                      || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
      AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits<Derived>::Flags&AlignedBit)==AlignedBit),
-      MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch
+      ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
+      MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
    };
    typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
  };
@@ -187,9 +188,11 @@ protected:
 template<typename PlainObjectType, int Options, typename StrideType> class Ref
  : public RefBase<Ref<PlainObjectType, Options, StrideType> >
 {
+  private:
    typedef internal::traits<Ref> Traits;
    template<typename Derived>
-    inline Ref(const PlainObjectBase<Derived>& expr);
+    inline Ref(const PlainObjectBase<Derived>& expr,
+               typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
  public:

    typedef RefBase<Ref> Base;
@@ -198,13 +201,15 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref

    #ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename Derived>
-    inline Ref(PlainObjectBase<Derived>& expr)
+    inline Ref(PlainObjectBase<Derived>& expr,
+               typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
    {
      EIGEN_STATIC_ASSERT(static_cast<bool>(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
      Base::construct(expr.derived());
    }
    template<typename Derived>
-    inline Ref(const DenseBase<Derived>& expr)
+    inline Ref(const DenseBase<Derived>& expr,
+               typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
    #else
    template<typename Derived>
    inline Ref(DenseBase<Derived>& expr)
@@ -231,13 +236,23 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
    EIGEN_DENSE_PUBLIC_INTERFACE(Ref)

    template<typename Derived>
-    inline Ref(const DenseBase<Derived>& expr)
+    inline Ref(const DenseBase<Derived>& expr,
+               typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
    {
 //      std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
 //      std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";
 //      std::cout << int(StrideType::InnerStrideAtCompileTime) << " - " << int(Derived::InnerStrideAtCompileTime) << "\n";
      construct(expr.derived(), typename Traits::template match<Derived>::type());
    }
+    
+    inline Ref(const Ref& other) : Base(other) {
+      // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy
+    }
+
+    template<typename OtherRef>
+    inline Ref(const RefBase<OtherRef>& other) {
+      construct(other.derived(), typename Traits::template match<OtherRef>::type());
+    }

  protected:

--- a/Eigen/src/Core/ReturnByValue.h
+++ b/Eigen/src/Core/ReturnByValue.h
@@ -72,6 +72,8 @@ template<typename Derived> class ReturnByValue
    const Unusable& coeff(Index,Index) const { return *reinterpret_cast<const Unusable*>(this); }
    Unusable& coeffRef(Index) { return *reinterpret_cast<Unusable*>(this); }
    Unusable& coeffRef(Index,Index) { return *reinterpret_cast<Unusable*>(this); }
+    template<int LoadMode>  Unusable& packet(Index) const;
+    template<int LoadMode>  Unusable& packet(Index, Index) const;
 #endif
 };

@@ -83,6 +85,15 @@ Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
  return derived();
 }

+template<typename Derived>
+template<typename OtherDerived>
+Derived& DenseBase<Derived>::lazyAssign(const ReturnByValue<OtherDerived>& other)
+{
+  other.evalTo(derived());
+  return derived();
+}
+
+
 } // end namespace Eigen

 #endif // EIGEN_RETURNBYVALUE_H
--- a/Eigen/src/Core/Reverse.h
+++ b/Eigen/src/Core/Reverse.h
@@ -76,9 +76,23 @@ template<typename MatrixType, int Direction> class Reverse
    EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
    using Base::IsRowMajor;

-    // next line is necessary because otherwise const version of operator()
-    // is hidden by non-const version defined in this file
-    using Base::operator(); 
+    // The following two operators are provided to worarkound
+    // a MSVC 2013 issue. In theory, we could simply do:
+    //   using Base::operator(); 
+    // to make const version of operator() visible.
+    // Otheriwse, they would be hidden by the non-const versions defined in this file
+    
+    inline CoeffReturnType operator()(Index row, Index col) const
+    {
+      eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
+      return coeff(row, col);
+    }
+
+    inline CoeffReturnType operator()(Index index) const
+    {
+      eigen_assert(index >= 0 && index < m_matrix.size());
+      return coeff(index);
+    }

  protected:
    enum {
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -180,15 +180,9 @@ inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
 template<typename Derived>
 inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
 {
-  typedef typename internal::conditional<NumTraits<Scalar>::IsInteger,
-                                        internal::scalar_quotient_op<Scalar>,
-                                        internal::scalar_product_op<Scalar> >::type BinOp;
  typedef typename Derived::PlainObject PlainObject;
-  SelfCwiseBinaryOp<BinOp, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
-  Scalar actual_other;
-  if(NumTraits<Scalar>::IsInteger)  actual_other = other;
-  else                              actual_other = Scalar(1)/other;
-  tmp = PlainObject::Constant(rows(),cols(), actual_other);
+  SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
+  tmp = PlainObject::Constant(rows(),cols(), other);
  return derived();
 }

--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -116,17 +116,17 @@ template<typename Lhs, typename Rhs, int Mode, int Index, int Size>
 struct triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,false> {
  enum {
    IsLower = ((Mode&Lower)==Lower),
-    I = IsLower ? Index : Size - Index - 1,
-    S = IsLower ? 0     : I+1
+    RowIndex = IsLower ? Index : Size - Index - 1,
+    S = IsLower ? 0     : RowIndex+1
  };
  static void run(const Lhs& lhs, Rhs& rhs)
  {
    if (Index>0)
-      rhs.coeffRef(I) -= lhs.row(I).template segment<Index>(S).transpose()
+      rhs.coeffRef(RowIndex) -= lhs.row(RowIndex).template segment<Index>(S).transpose()
                         .cwiseProduct(rhs.template segment<Index>(S)).sum();

    if(!(Mode & UnitDiag))
-      rhs.coeffRef(I) /= lhs.coeff(I,I);
+      rhs.coeffRef(RowIndex) /= lhs.coeff(RowIndex,RowIndex);

    triangular_solver_unroller<Lhs,Rhs,Mode,Index+1,Size>::run(lhs,rhs);
  }
@@ -243,7 +243,8 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv

  template<typename Dest> inline void evalTo(Dest& dst) const
  {
-    if(!(is_same<RhsNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_rhs)))
+    const typename Dest::Scalar *dst_data = internal::extract_data(dst);
+    if(!(is_same<RhsNestedCleaned,Dest>::value && dst_data!=0 && extract_data(dst) == extract_data(m_rhs)))
      dst = m_rhs;
    m_triangularMatrix.template solveInPlace<Side>(dst);
  }
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -331,11 +331,11 @@ inline void MatrixBase<Derived>::adjointInPlace()

 namespace internal {

-template<typename BinOp,typename NestedXpr,typename Rhs>
-struct blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
- : blas_traits<NestedXpr>
+template<typename BinOp,typename Xpr,typename Rhs>
+struct blas_traits<SelfCwiseBinaryOp<BinOp,Xpr,Rhs> >
+ : blas_traits<typename internal::remove_all<typename Xpr::Nested>::type>
 {
-  typedef SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> XprType;
+  typedef SelfCwiseBinaryOp<BinOp,Xpr,Rhs> XprType;
  static inline const XprType extract(const XprType& x) { return x; }
 };

@@ -392,7 +392,6 @@ struct checkTransposeAliasing_impl
                      ::run(extract_data(dst), other))
          && "aliasing detected during transposition, use transposeInPlace() "
             "or evaluate the rhs into a temporary using .eval()");
-
    }
 };

--- a/Eigen/src/Core/Transpositions.h
+++ b/Eigen/src/Core/Transpositions.h
@@ -376,7 +376,8 @@ struct transposition_matrix_product_retval
      const int size = m_transpositions.size();
      Index j = 0;

-      if(!(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix)))
+      const typename Dest::Scalar *dst_data = internal::extract_data(dst);
+      if(!(is_same<MatrixTypeNestedCleaned,Dest>::value && dst_data!=0 && dst_data == extract_data(m_matrix)))
        dst = m_matrix;

      for(int k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
--- a/Eigen/src/Core/Visitor.h
+++ b/Eigen/src/Core/Visitor.h
@@ -76,14 +76,17 @@ template<typename Derived>
 template<typename Visitor>
 void DenseBase<Derived>::visit(Visitor& visitor) const
 {
+  typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
+  typename Derived::Nested thisNested(derived());
+
  enum { unroll = SizeAtCompileTime != Dynamic
                   && CoeffReadCost != Dynamic
                   && (SizeAtCompileTime == 1 || internal::functor_traits<Visitor>::Cost != Dynamic)
                   && SizeAtCompileTime * CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost
                      <= EIGEN_UNROLLING_LIMIT };
-  return internal::visitor_impl<Visitor, Derived,
+  return internal::visitor_impl<Visitor, ThisNested,
      unroll ? int(SizeAtCompileTime) : Dynamic
-    >::run(derived(), visitor);
+    >::run(thisNested, visitor);
 }

 namespace internal {
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -384,6 +384,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
  a_lo = vget_low_s32(a);
  a_hi = vget_high_s32(a);
  max = vpmax_s32(a_lo, a_hi);
+  max = vpmax_s32(max, max);

  return vget_lane_s32(max, 0);
 }
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -126,7 +126,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);

-  Packet4f tmp = _mm_setzero_ps(), fx;
+  Packet4f tmp, fx;
  Packet4i emm0;

  // clamp x
@@ -195,7 +195,7 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
  _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
  static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);

-  Packet2d tmp = _mm_setzero_pd(), fx;
+  Packet2d tmp, fx;
  Packet4i emm0;

  // clamp x
@@ -279,7 +279,7 @@ Packet4f psin<Packet4f>(const Packet4f& _x)
  _EIGEN_DECLARE_CONST_Packet4f(coscof_p2,  4.166664568298827E-002f);
  _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI

-  Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
+  Packet4f xmm1, xmm2, xmm3, sign_bit, y;

  Packet4i emm0, emm2;
  sign_bit = x;
@@ -378,7 +378,7 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
  _EIGEN_DECLARE_CONST_Packet4f(coscof_p2,  4.166664568298827E-002f);
  _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI

-  Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
+  Packet4f xmm1, xmm2, xmm3, y;
  Packet4i emm0, emm2;

  x = pabs(x);
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -235,63 +235,27 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int*     from) { E
    return _mm_loadu_ps(from);
    #endif
  }
-  template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
-  template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int*    from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
 #else
-// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would
-// require pointer casting to incompatible pointer types and leads to invalid code
-// because of the strict aliasing rule. The "dummy" stuff are required to enforce
-// a correct instruction dependency.
-// TODO: do the same for MSVC (ICC is compatible)
 // NOTE: with the code below, MSVC's compiler crashes!

-#if defined(__GNUC__) && defined(__i386__)
-  // bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd
-  #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
-#elif defined(__clang__)
-  // bug 201: Segfaults in __mm_loadh_pd with clang 2.8
-  #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
-#else
-  #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0
-#endif
-
 template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
 {
  EIGEN_DEBUG_UNALIGNED_LOAD
-#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
  return _mm_loadu_ps(from);
-#else
-  __m128d res;
-  res =  _mm_load_sd((const double*)(from)) ;
-  res =  _mm_loadh_pd(res, (const double*)(from+2)) ;
-  return _mm_castpd_ps(res);
-#endif
 }
+#endif
+
 template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
 {
  EIGEN_DEBUG_UNALIGNED_LOAD
-#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
  return _mm_loadu_pd(from);
-#else
-  __m128d res;
-  res = _mm_load_sd(from) ;
-  res = _mm_loadh_pd(res,from+1);
-  return res;
-#endif
 }
 template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
 {
  EIGEN_DEBUG_UNALIGNED_LOAD
-#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS
-  return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from));
-#else
-  __m128d res;
-  res =  _mm_load_sd((const double*)(from)) ;
-  res =  _mm_loadh_pd(res, (const double*)(from+2)) ;
-  return _mm_castpd_si128(res);
-#endif
+  return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
 }
-#endif
+

 template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float*   from)
 {
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -134,7 +134,7 @@ class CoeffBasedProduct
    };

    typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
-                                   Unroll ? (InnerSize==0 ? 0 : InnerSize-1) : Dynamic,
+                                   Unroll ? InnerSize : Dynamic,
                                   _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;

    typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
@@ -185,7 +185,7 @@ class CoeffBasedProduct
    {
      PacketScalar res;
      internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
-                              Unroll ? (InnerSize==0 ? 0 : InnerSize-1) : Dynamic,
+                              Unroll ? InnerSize : Dynamic,
                              _LhsNested, _RhsNested, PacketScalar, LoadMode>
        ::run(row, col, m_lhs, m_rhs, res);
      return res;
@@ -243,7 +243,17 @@ struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
  {
    product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
-    res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
+    res += lhs.coeff(row, UnrollingIndex-1) * rhs.coeff(UnrollingIndex-1, col);
+  }
+};
+
+template<typename Lhs, typename Rhs, typename RetScalar>
+struct product_coeff_impl<DefaultTraversal, 1, Lhs, Rhs, RetScalar>
+{
+  typedef typename Lhs::Index Index;
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
+  {
+    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
  }
 };

@@ -251,9 +261,9 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res)
  {
-    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
+    res = RetScalar(0);
  }
 };

@@ -293,6 +303,16 @@ struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
  }
 };

+template<typename Lhs, typename Rhs, typename RetScalar>
+struct product_coeff_impl<InnerVectorizedTraversal, 0, Lhs, Rhs, RetScalar>
+{
+  typedef typename Lhs::Index Index;
+  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res)
+  {
+    res = 0;
+  }
+};
+
 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
 struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
 {
@@ -302,8 +322,7 @@ struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, Re
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
  {
    Packet pres;
-    product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
-    product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
+    product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
    res = predux(pres);
  }
 };
@@ -371,7 +390,7 @@ struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
  {
    product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
-    res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
+    res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode>(UnrollingIndex-1, col), res);
  }
 };

@@ -382,12 +401,12 @@ struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
  {
    product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
-    res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
+    res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
  }
 };

 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
+struct product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
@@ -397,7 +416,7 @@ struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
 };

 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
+struct product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
@@ -406,16 +425,35 @@ struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
  }
 };

+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
+{
+  typedef typename Lhs::Index Index;
+  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res)
+  {
+    res = pset1<Packet>(0);
+  }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
+{
+  typedef typename Lhs::Index Index;
+  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res)
+  {
+    res = pset1<Packet>(0);
+  }
+};
+
 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
  {
-    eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
-    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
-      for(Index i = 1; i < lhs.cols(); ++i)
-        res =  pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
+    res = pset1<Packet>(0);
+    for(Index i = 0; i < lhs.cols(); ++i)
+      res =  pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
  }
 };

@@ -425,10 +463,9 @@ struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
  typedef typename Lhs::Index Index;
  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
  {
-    eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
-    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
-      for(Index i = 1; i < lhs.cols(); ++i)
-        res =  pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
+    res = pset1<Packet>(0);
+    for(Index i = 0; i < lhs.cols(); ++i)
+      res =  pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
  }
 };

--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -140,8 +140,10 @@ static void run(Index rows, Index cols, Index depth,
      // Release all the sub blocks B'_j of B' for the current thread,
      // i.e., we simply decrement the number of users by 1
      for(Index j=0; j<threads; ++j)
+      {
        #pragma omp atomic
-        --(info[j].users);
+        info[j].users -= 1;
+      }
    }
  }
  else
@@ -390,13 +392,17 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>

    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
    {
+#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
      typedef internal::scalar_product_op<LhsScalar,RhsScalar> BinOp;
      EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
+#endif
    }

    template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
    {
      eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+      if(m_lhs.cols()==0 || m_lhs.rows()==0 || m_rhs.cols()==0)
+        return;

      typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
      typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -125,19 +125,22 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
  if(transpose)
    std::swap(rows,cols);

-  Index blockCols = (cols / threads) & ~Index(0x3);
-  Index blockRows = (rows / threads) & ~Index(0x7);
-  
  GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];

-  #pragma omp parallel for schedule(static,1) num_threads(threads)
-  for(Index i=0; i<threads; ++i)
+  #pragma omp parallel num_threads(threads)
  {
+    Index i = omp_get_thread_num();
+    // Note that the actual number of threads might be lower than the number of request ones.
+    Index actual_threads = omp_get_num_threads();
+    
+    Index blockCols = (cols / actual_threads) & ~Index(0x3);
+    Index blockRows = (rows / actual_threads) & ~Index(0x7);
+    
    Index r0 = i*blockRows;
-    Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
+    Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;

    Index c0 = i*blockCols;
-    Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
+    Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;

    info[i].rhs_start = c0;
    info[i].rhs_length = actualBlockCols;
--- a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
@@ -109,7 +109,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
 /* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
   if (rows != depth) { \
 \
-     int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
+     int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \
 \
     if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
     /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
@@ -223,7 +223,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
 /* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
   if (cols != depth) { \
 \
-     int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
+     int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \
 \
     if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
     /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -81,7 +81,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
    // coherence when accessing the rhs elements
    std::ptrdiff_t l1, l2;
    manage_caching_sizes(GetAction, &l1, &l2);
-    Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0;
+    Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) *  std::max<Index>(otherStride,size)) : 0;
    subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);

    for(Index k2=IsLower ? 0 : size;
@@ -115,8 +115,9 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
          {
            // TODO write a small kernel handling this (can be shared with trsv)
            Index i  = IsLower ? k2+k1+k : k2-k1-k-1;
-            Index s  = IsLower ? k2+k1 : i+1;
            Index rs = actualPanelWidth - k - 1; // remaining size
+            Index s  = TriStorageOrder==RowMajor ? (IsLower ? k2+k1 : i+1)
+                                                 :  IsLower ? i+1 : i-rs;

            Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
            for (Index j=j2; j<j2+actual_cols; ++j)
@@ -133,7 +134,6 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
              }
              else
              {
-                Index s = IsLower ? i+1 : i-rs;
                Scalar b = (other(i,j) *= a);
                Scalar* r = &other(s,j);
                const Scalar* l = &tri(s,i);
@@ -302,9 +302,12 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
                for (Index i=0; i<actual_mc; ++i)
                  r[i] -= a[i] * b;
              }
-              Scalar b = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(rhs(j,j));
-              for (Index i=0; i<actual_mc; ++i)
-                r[i] *= b;
+              if((Mode & UnitDiag)==0)
+              {
+                Scalar b = conj(rhs(j,j));
+                for (Index i=0; i<actual_mc; ++i)
+                  r[i] /= b;
+              }
            }

            // pack the just computed part of lhs to A
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -42,16 +42,29 @@ template<bool Conjugate> struct conj_if;

 template<> struct conj_if<true> {
  template<typename T>
-  inline T operator()(const T& x) { return numext::conj(x); }
+  inline T operator()(const T& x) const { return numext::conj(x); }
  template<typename T>
-  inline T pconj(const T& x) { return internal::pconj(x); }
+  inline T pconj(const T& x) const { return internal::pconj(x); }
 };

 template<> struct conj_if<false> {
  template<typename T>
-  inline const T& operator()(const T& x) { return x; }
+  inline const T& operator()(const T& x) const { return x; }
  template<typename T>
-  inline const T& pconj(const T& x) { return x; }
+  inline const T& pconj(const T& x) const { return x; }
+};
+
+// Generic implementation for custom complex types.
+template<typename LhsScalar, typename RhsScalar, bool ConjLhs, bool ConjRhs>
+struct conj_helper
+{
+  typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType Scalar;
+
+  EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const
+  { return padd(c, pmul(x,y)); }
+
+  EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const
+  { return conj_if<ConjLhs>()(x) *  conj_if<ConjRhs>()(y); }
 };

 template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
@@ -171,12 +184,13 @@ template<typename XprType> struct blas_traits
 };

 // pop conjugate
-template<typename Scalar, typename NestedXpr>
-struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
- : blas_traits<NestedXpr>
+template<typename Scalar, typename Xpr>
+struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, Xpr> >
+ : blas_traits<typename internal::remove_all<typename Xpr::Nested>::type>
 {
+  typedef typename internal::remove_all<typename Xpr::Nested>::type NestedXpr;
  typedef blas_traits<NestedXpr> Base;
-  typedef CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> XprType;
+  typedef CwiseUnaryOp<scalar_conjugate_op<Scalar>, Xpr> XprType;
  typedef typename Base::ExtractType ExtractType;

  enum {
@@ -188,12 +202,13 @@ struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
 };

 // pop scalar multiple
-template<typename Scalar, typename NestedXpr>
-struct blas_traits<CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> >
- : blas_traits<NestedXpr>
+template<typename Scalar, typename Xpr>
+struct blas_traits<CwiseUnaryOp<scalar_multiple_op<Scalar>, Xpr> >
+ : blas_traits<typename internal::remove_all<typename Xpr::Nested>::type>
 {
+  typedef typename internal::remove_all<typename Xpr::Nested>::type NestedXpr;
  typedef blas_traits<NestedXpr> Base;
-  typedef CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> XprType;
+  typedef CwiseUnaryOp<scalar_multiple_op<Scalar>, Xpr> XprType;
  typedef typename Base::ExtractType ExtractType;
  static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
  static inline Scalar extractScalarFactor(const XprType& x)
@@ -201,12 +216,13 @@ struct blas_traits<CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> >
 };

 // pop opposite
-template<typename Scalar, typename NestedXpr>
-struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
- : blas_traits<NestedXpr>
+template<typename Scalar, typename Xpr>
+struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, Xpr> >
+ : blas_traits<typename internal::remove_all<typename Xpr::Nested>::type>
 {
+  typedef typename internal::remove_all<typename Xpr::Nested>::type NestedXpr;
  typedef blas_traits<NestedXpr> Base;
-  typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
+  typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, Xpr> XprType;
  typedef typename Base::ExtractType ExtractType;
  static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
  static inline Scalar extractScalarFactor(const XprType& x)
@@ -214,13 +230,14 @@ struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
 };

 // pop/push transpose
-template<typename NestedXpr>
-struct blas_traits<Transpose<NestedXpr> >
- : blas_traits<NestedXpr>
+template<typename Xpr>
+struct blas_traits<Transpose<Xpr> >
+ : blas_traits<typename internal::remove_all<typename Xpr::Nested>::type>
 {
+  typedef typename internal::remove_all<typename Xpr::Nested>::type NestedXpr;
  typedef typename NestedXpr::Scalar Scalar;
  typedef blas_traits<NestedXpr> Base;
-  typedef Transpose<NestedXpr> XprType;
+  typedef Transpose<Xpr> XprType;
  typedef Transpose<const typename Base::_ExtractType>  ExtractType; // const to get rid of a compile error; anyway blas traits are only used on the RHS
  typedef Transpose<const typename Base::_ExtractType> _ExtractType;
  typedef typename conditional<bool(Base::HasUsableDirectAccess),
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -162,7 +162,7 @@ const unsigned int HereditaryBits = RowMajorBit
 /** \ingroup enums
  * Enum containing possible values for the \p Mode parameter of 
  * MatrixBase::selfadjointView() and MatrixBase::triangularView(). */
-enum {
+enum UpLoType {
  /** View matrix as a lower triangular matrix. */
  Lower=0x1,                      
  /** View matrix as an upper triangular matrix. */
@@ -187,7 +187,7 @@ enum {

 /** \ingroup enums
  * Enum for indicating whether an object is aligned or not. */
-enum { 
+enum AlignmentType {
  /** Object is not correctly aligned for vectorization. */
  Unaligned=0, 
  /** Object is aligned for vectorization. */
@@ -217,7 +217,7 @@ enum DirectionType {

 /** \internal \ingroup enums
  * Enum to specify how to traverse the entries of a matrix. */
-enum {
+enum TraversalType {
  /** \internal Default traversal, no vectorization, no index-based access */
  DefaultTraversal,
  /** \internal No vectorization, use index-based access to have only one for loop instead of 2 nested loops */
@@ -239,7 +239,7 @@ enum {

 /** \internal \ingroup enums
  * Enum to specify whether to unroll loops when traversing over the entries of a matrix. */
-enum {
+enum UnrollingType {
  /** \internal Do not unroll loops. */
  NoUnrolling,
  /** \internal Unroll only the inner loop, but not the outer loop. */
@@ -251,7 +251,7 @@ enum {

 /** \internal \ingroup enums
  * Enum to specify whether to use the default (built-in) implementation or the specialization. */
-enum {
+enum SpecializedType {
  Specialized,
  BuiltIn
 };
@@ -259,7 +259,7 @@ enum {
 /** \ingroup enums
  * Enum containing possible values for the \p _Options template parameter of
  * Matrix, Array and BandMatrix. */
-enum {
+enum StorageOptions {
  /** Storage order is column major (see \ref TopicStorageOrders). */
  ColMajor = 0,
  /** Storage order is row major (see \ref TopicStorageOrders). */
@@ -272,7 +272,7 @@ enum {

 /** \ingroup enums
  * Enum for specifying whether to apply or solve on the left or right. */
-enum {
+enum SideType {
  /** Apply transformation on the left. */
  OnTheLeft = 1,  
  /** Apply transformation on the right. */
@@ -418,7 +418,7 @@ namespace Architecture

 /** \internal \ingroup enums
  * Enum used as template parameter in GeneralProduct. */
-enum { CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
+enum ProductImplType { CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };

 /** \internal \ingroup enums
  * Enum used in experimental parallel implementation. */
@@ -433,6 +433,19 @@ struct MatrixXpr {};
 /** The type used to identify an array expression */
 struct ArrayXpr {};

+namespace internal {
+  /** \internal
+  * Constants for comparison functors
+  */
+  enum ComparisonName {
+    cmp_EQ = 0,
+    cmp_LT = 1,
+    cmp_LE = 2,
+    cmp_UNORD = 3,
+    cmp_NEQ = 4
+  };
+}
+
 } // end namespace Eigen

 #endif // EIGEN_CONSTANTS_H
--- a/Eigen/src/Core/util/DisableStupidWarnings.h
+++ b/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -35,6 +35,14 @@
    #pragma clang diagnostic push
  #endif
  #pragma clang diagnostic ignored "-Wconstant-logical-operand"
+
+#elif defined __GNUC__ && __GNUC__>=6
+
+  #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+    #pragma GCC diagnostic push
+  #endif
+  #pragma GCC diagnostic ignored "-Wignored-attributes"
+
 #endif

 #endif // not EIGEN_WARNINGS_DISABLED
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -235,6 +235,9 @@ template<typename Scalar> class Rotation2D;
 template<typename Scalar> class AngleAxis;
 template<typename Scalar,int Dim> class Translation;

+// Sparse module:
+template<typename Derived> class SparseMatrixBase;
+
 #ifdef EIGEN2_SUPPORT
 template<typename Derived, int _Dim> class eigen2_RotationBase;
 template<typename Lhs, typename Rhs> class eigen2_Cross;
--- a/Eigen/src/Core/util/MKL_support.h
+++ b/Eigen/src/Core/util/MKL_support.h
@@ -76,6 +76,38 @@
 #include <mkl_lapacke.h>
 #define EIGEN_MKL_VML_THRESHOLD 128

+/* MKL_DOMAIN_BLAS, etc are defined only in 10.3 update 7 */
+/* MKL_BLAS, etc are not defined in 11.2 */
+#ifdef MKL_DOMAIN_ALL
+#define EIGEN_MKL_DOMAIN_ALL MKL_DOMAIN_ALL
+#else
+#define EIGEN_MKL_DOMAIN_ALL MKL_ALL
+#endif
+
+#ifdef MKL_DOMAIN_BLAS
+#define EIGEN_MKL_DOMAIN_BLAS MKL_DOMAIN_BLAS
+#else
+#define EIGEN_MKL_DOMAIN_BLAS MKL_BLAS
+#endif
+
+#ifdef MKL_DOMAIN_FFT
+#define EIGEN_MKL_DOMAIN_FFT MKL_DOMAIN_FFT
+#else
+#define EIGEN_MKL_DOMAIN_FFT MKL_FFT
+#endif
+
+#ifdef MKL_DOMAIN_VML
+#define EIGEN_MKL_DOMAIN_VML MKL_DOMAIN_VML
+#else
+#define EIGEN_MKL_DOMAIN_VML MKL_VML
+#endif
+
+#ifdef MKL_DOMAIN_PARDISO
+#define EIGEN_MKL_DOMAIN_PARDISO MKL_DOMAIN_PARDISO
+#else
+#define EIGEN_MKL_DOMAIN_PARDISO MKL_PARDISO
+#endif
+
 namespace Eigen {

 typedef std::complex<double> dcomplex;
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -13,23 +13,292 @@

 #define EIGEN_WORLD_VERSION 3
 #define EIGEN_MAJOR_VERSION 2
-#define EIGEN_MINOR_VERSION 4
+#define EIGEN_MINOR_VERSION 10

 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                      (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
                                                                 EIGEN_MINOR_VERSION>=z))))
+
+
+// Compiler identification, EIGEN_COMP_*
+
+/// \internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC
 #ifdef __GNUC__
+  #define EIGEN_COMP_GNUC 1
+#else
+  #define EIGEN_COMP_GNUC 0
+#endif
+
+/// \internal EIGEN_COMP_CLANG set to 1 if the compiler is clang (alias for __clang__)
+#if defined(__clang__)
+  #define EIGEN_COMP_CLANG 1
+#else
+  #define EIGEN_COMP_CLANG 0
+#endif
+
+
+/// \internal EIGEN_COMP_LLVM set to 1 if the compiler backend is llvm
+#if defined(__llvm__)
+  #define EIGEN_COMP_LLVM 1
+#else
+  #define EIGEN_COMP_LLVM 0
+#endif
+
+/// \internal EIGEN_COMP_ICC set to __INTEL_COMPILER if the compiler is Intel compiler, 0 otherwise
+#if defined(__INTEL_COMPILER)
+  #define EIGEN_COMP_ICC __INTEL_COMPILER
+#else
+  #define EIGEN_COMP_ICC 0
+#endif
+
+/// \internal EIGEN_COMP_MINGW set to 1 if the compiler is mingw
+#if defined(__MINGW32__)
+  #define EIGEN_COMP_MINGW 1
+#else
+  #define EIGEN_COMP_MINGW 0
+#endif
+
+/// \internal EIGEN_COMP_SUNCC set to 1 if the compiler is Solaris Studio
+#if defined(__SUNPRO_CC)
+  #define EIGEN_COMP_SUNCC 1
+#else
+  #define EIGEN_COMP_SUNCC 0
+#endif
+
+/// \internal EIGEN_COMP_MSVC set to _MSC_VER if the compiler is Microsoft Visual C++, 0 otherwise.
+#if defined(_MSC_VER)
+  #define EIGEN_COMP_MSVC _MSC_VER
+#else
+  #define EIGEN_COMP_MSVC 0
+#endif
+
+/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC
+#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC)
+  #define EIGEN_COMP_MSVC_STRICT _MSC_VER
+#else
+  #define EIGEN_COMP_MSVC_STRICT 0
+#endif
+
+/// \internal EIGEN_COMP_IBM set to 1 if the compiler is IBM XL C++
+#if defined(__IBMCPP__) || defined(__xlc__)
+  #define EIGEN_COMP_IBM 1
+#else
+  #define EIGEN_COMP_IBM 0
+#endif
+
+/// \internal EIGEN_COMP_PGI set to 1 if the compiler is Portland Group Compiler
+#if defined(__PGI)
+  #define EIGEN_COMP_PGI 1
+#else
+  #define EIGEN_COMP_PGI 0
+#endif
+
+/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler
+#if defined(__CC_ARM) || defined(__ARMCC_VERSION)
+  #define EIGEN_COMP_ARM 1
+#else
+  #define EIGEN_COMP_ARM 0
+#endif
+
+
+/// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.)
+#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM )
+  #define EIGEN_COMP_GNUC_STRICT 1
+#else
+  #define EIGEN_COMP_GNUC_STRICT 0
+#endif
+
+
+#if EIGEN_COMP_GNUC
  #define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__==x && __GNUC_MINOR__>=y) || __GNUC__>x)
+  #define EIGEN_GNUC_AT_MOST(x,y)  ((__GNUC__==x && __GNUC_MINOR__<=y) || __GNUC__<x)
+  #define EIGEN_GNUC_AT(x,y)       ( __GNUC__==x && __GNUC_MINOR__==y )
 #else
  #define EIGEN_GNUC_AT_LEAST(x,y) 0
+  #define EIGEN_GNUC_AT_MOST(x,y)  0
+  #define EIGEN_GNUC_AT(x,y)       0
 #endif
- 
-#ifdef __GNUC__
-  #define EIGEN_GNUC_AT_MOST(x,y) ((__GNUC__==x && __GNUC_MINOR__<=y) || __GNUC__<x)
+
+// FIXME: could probably be removed as we do not support gcc 3.x anymore
+#if EIGEN_COMP_GNUC && (__GNUC__ <= 3)
+#define EIGEN_GCC3_OR_OLDER 1
 #else
-  #define EIGEN_GNUC_AT_MOST(x,y) 0
+#define EIGEN_GCC3_OR_OLDER 0
 #endif

+
+// Architecture identification, EIGEN_ARCH_*
+
+#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64)
+  #define EIGEN_ARCH_x86_64 1
+#else
+  #define EIGEN_ARCH_x86_64 0
+#endif
+
+#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386)
+  #define EIGEN_ARCH_i386 1
+#else
+  #define EIGEN_ARCH_i386 0
+#endif
+
+#if EIGEN_ARCH_x86_64 || EIGEN_ARCH_i386
+  #define EIGEN_ARCH_i386_OR_x86_64 1
+#else
+  #define EIGEN_ARCH_i386_OR_x86_64 0
+#endif
+
+/// \internal EIGEN_ARCH_ARM set to 1 if the architecture is ARM
+#if defined(__arm__)
+  #define EIGEN_ARCH_ARM 1
+#else
+  #define EIGEN_ARCH_ARM 0
+#endif
+
+/// \internal EIGEN_ARCH_ARM64 set to 1 if the architecture is ARM64
+#if defined(__aarch64__)
+  #define EIGEN_ARCH_ARM64 1
+#else
+  #define EIGEN_ARCH_ARM64 0
+#endif
+
+#if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64
+  #define EIGEN_ARCH_ARM_OR_ARM64 1
+#else
+  #define EIGEN_ARCH_ARM_OR_ARM64 0
+#endif
+
+/// \internal EIGEN_ARCH_MIPS set to 1 if the architecture is MIPS
+#if defined(__mips__) || defined(__mips)
+  #define EIGEN_ARCH_MIPS 1
+#else
+  #define EIGEN_ARCH_MIPS 0
+#endif
+
+/// \internal EIGEN_ARCH_SPARC set to 1 if the architecture is SPARC
+#if defined(__sparc__) || defined(__sparc)
+  #define EIGEN_ARCH_SPARC 1
+#else
+  #define EIGEN_ARCH_SPARC 0
+#endif
+
+/// \internal EIGEN_ARCH_IA64 set to 1 if the architecture is Intel Itanium
+#if defined(__ia64__)
+  #define EIGEN_ARCH_IA64 1
+#else
+  #define EIGEN_ARCH_IA64 0
+#endif
+
+/// \internal EIGEN_ARCH_PPC set to 1 if the architecture is PowerPC
+#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC)
+  #define EIGEN_ARCH_PPC 1
+#else
+  #define EIGEN_ARCH_PPC 0
+#endif
+
+
+
+// Operating system identification, EIGEN_OS_*
+
+/// \internal EIGEN_OS_UNIX set to 1 if the OS is a unix variant
+#if defined(__unix__) || defined(__unix)
+  #define EIGEN_OS_UNIX 1
+#else
+  #define EIGEN_OS_UNIX 0
+#endif
+
+/// \internal EIGEN_OS_LINUX set to 1 if the OS is based on Linux kernel
+#if defined(__linux__)
+  #define EIGEN_OS_LINUX 1
+#else
+  #define EIGEN_OS_LINUX 0
+#endif
+
+/// \internal EIGEN_OS_ANDROID set to 1 if the OS is Android
+// note: ANDROID is defined when using ndk_build, __ANDROID__ is defined when using a standalone toolchain.
+#if defined(__ANDROID__) || defined(ANDROID)
+  #define EIGEN_OS_ANDROID 1
+#else
+  #define EIGEN_OS_ANDROID 0
+#endif
+
+/// \internal EIGEN_OS_GNULINUX set to 1 if the OS is GNU Linux and not Linux-based OS (e.g., not android)
+#if defined(__gnu_linux__) && !(EIGEN_OS_ANDROID)
+  #define EIGEN_OS_GNULINUX 1
+#else
+  #define EIGEN_OS_GNULINUX 0
+#endif
+
+/// \internal EIGEN_OS_BSD set to 1 if the OS is a BSD variant
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__)
+  #define EIGEN_OS_BSD 1
+#else
+  #define EIGEN_OS_BSD 0
+#endif
+
+/// \internal EIGEN_OS_MAC set to 1 if the OS is MacOS
+#if defined(__APPLE__)
+  #define EIGEN_OS_MAC 1
+#else
+  #define EIGEN_OS_MAC 0
+#endif
+
+/// \internal EIGEN_OS_QNX set to 1 if the OS is QNX
+#if defined(__QNX__)
+  #define EIGEN_OS_QNX 1
+#else
+  #define EIGEN_OS_QNX 0
+#endif
+
+/// \internal EIGEN_OS_WIN set to 1 if the OS is Windows based
+#if defined(_WIN32)
+  #define EIGEN_OS_WIN 1
+#else
+  #define EIGEN_OS_WIN 0
+#endif
+
+/// \internal EIGEN_OS_WIN64 set to 1 if the OS is Windows 64bits
+#if defined(_WIN64)
+  #define EIGEN_OS_WIN64 1
+#else
+  #define EIGEN_OS_WIN64 0
+#endif
+
+/// \internal EIGEN_OS_WINCE set to 1 if the OS is Windows CE
+#if defined(_WIN32_WCE)
+  #define EIGEN_OS_WINCE 1
+#else
+  #define EIGEN_OS_WINCE 0
+#endif
+
+/// \internal EIGEN_OS_CYGWIN set to 1 if the OS is Windows/Cygwin
+#if defined(__CYGWIN__)
+  #define EIGEN_OS_CYGWIN 1
+#else
+  #define EIGEN_OS_CYGWIN 0
+#endif
+
+/// \internal EIGEN_OS_WIN_STRICT set to 1 if the OS is really Windows and not some variants
+#if EIGEN_OS_WIN && !( EIGEN_OS_WINCE || EIGEN_OS_CYGWIN )
+  #define EIGEN_OS_WIN_STRICT 1
+#else
+  #define EIGEN_OS_WIN_STRICT 0
+#endif
+
+/// \internal EIGEN_OS_SUN set to 1 if the OS is SUN
+#if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__))
+  #define EIGEN_OS_SUN 1
+#else
+  #define EIGEN_OS_SUN 0
+#endif
+
+/// \internal EIGEN_OS_SOLARIS set to 1 if the OS is Solaris
+#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))
+  #define EIGEN_OS_SOLARIS 1
+#else
+  #define EIGEN_OS_SOLARIS 0
+#endif
+
+
 #if EIGEN_GNUC_AT_MOST(4,3) && !defined(__clang__)
  // see bug 89
  #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
@@ -37,12 +306,6 @@
  #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
 #endif

-#if defined(__GNUC__) && (__GNUC__ <= 3)
-#define EIGEN_GCC3_OR_OLDER 1
-#else
-#define EIGEN_GCC3_OR_OLDER 0
-#endif
-
 // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
 // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
 // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
@@ -96,6 +359,20 @@
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t
 #endif

+// A Clang feature extension to determine compiler features.
+// We use it to determine 'cxx_rvalue_references'
+#ifndef __has_feature
+# define __has_feature(x) 0
+#endif
+
+// Do we support r-value references?
+#if (__has_feature(cxx_rvalue_references) || \
+     (defined(__cplusplus) && __cplusplus >= 201103L) || \
+     (defined(_MSC_VER) && _MSC_VER >= 1600))
+  #define EIGEN_HAVE_RVALUE_REFERENCES
+#endif
+
+
 // Cross compiler wrapper around LLVM's __has_builtin
 #ifdef __has_builtin
 #  define EIGEN_HAS_BUILTIN(x) __has_builtin(x)
@@ -135,10 +412,11 @@
 // attribute to maximize inlining. This should only be used when really necessary: in particular,
 // it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
 // FIXME with the always_inline attribute,
-// gcc 3.4.x reports the following compilation error:
+// gcc 3.4.x and 4.1 reports the following compilation error:
 //   Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
 //    : function body not available
-#if EIGEN_GNUC_AT_LEAST(4,0)
+//   See also bug 1367
+#if EIGEN_GNUC_AT_LEAST(4,2)
 #define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
 #else
 #define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
@@ -278,6 +556,7 @@ namespace Eigen {
  #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
 #endif

+#define EIGEN_ALIGN8  EIGEN_ALIGN_TO_BOUNDARY(8)
 #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)

 #if EIGEN_ALIGN_STATICALLY
@@ -332,8 +611,11 @@ namespace Eigen {
  }
 #endif

-#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) \
-  EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived)
+/** \internal
+ * \brief Macro to manually inherit assignment operators.
+ * This is necessary, because the implicitly defined assignment operator gets deleted when a custom operator= is defined.
+ */
+#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived)

 /**
 * Just a side note. Commenting within defines works only by documenting
@@ -405,6 +687,8 @@ namespace Eigen {
 #define EIGEN_SIZE_MAX(a,b) (((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
                           : ((int)a >= (int)b) ? (int)a : (int)b)

+#define EIGEN_ADD_COST(a,b) int(a)==Dynamic || int(b)==Dynamic ? Dynamic : int(a)+int(b)
+
 #define EIGEN_LOGICAL_XOR(a,b) (((a) || (b)) && !((a) && (b)))

 #define EIGEN_IMPLIES(a,b) (!(a) || (b))
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -507,7 +507,12 @@ template<typename T> void smart_copy(const T* start, const T* end, T* target)

 template<typename T> struct smart_copy_helper<T,true> {
  static inline void run(const T* start, const T* end, T* target)
-  { memcpy(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
+  {
+    std::ptrdiff_t size = std::ptrdiff_t(end)-std::ptrdiff_t(start);
+    if(size==0) return;
+    eigen_internal_assert(start!=0 && end!=0 && target!=0);
+    memcpy(target, start, size);
+  }
 };

 template<typename T> struct smart_copy_helper<T,false> {
@@ -515,7 +520,6 @@ template<typename T> struct smart_copy_helper<T,false> {
  { std::copy(start, end, target); }
 };

-
 /*****************************************************************************
 *** Implementation of runtime stack allocation (falling back to malloc)    ***
 *****************************************************************************/
@@ -523,7 +527,7 @@ template<typename T> struct smart_copy_helper<T,false> {
 // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
 // to the appropriate stack allocation function
 #ifndef EIGEN_ALLOCA
-  #if (defined __linux__)
+  #if (defined __linux__) || (defined __APPLE__) || (defined alloca)
    #define EIGEN_ALLOCA alloca
  #elif defined(_MSC_VER)
    #define EIGEN_ALLOCA _alloca
@@ -630,6 +634,8 @@ template<typename T> class aligned_stack_memory_handler
      } \
      void operator delete(void * ptr) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
      void operator delete[](void * ptr) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+      void operator delete(void * ptr, std::size_t /* sz */) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+      void operator delete[](void * ptr, std::size_t /* sz */) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
      /* in-place new and delete. since (at least afaik) there is no actual   */ \
      /* memory allocated we can safely let the default implementation handle */ \
      /* this particular case. */ \
@@ -653,99 +659,60 @@ template<typename T> class aligned_stack_memory_handler

 /****************************************************************************/

+
 /** \class aligned_allocator
-* \ingroup Core_Module
-*
-* \brief STL compatible allocator to use with with 16 byte aligned types
-*
-* Example:
-* \code
-* // Matrix4f requires 16 bytes alignment:
-* std::map< int, Matrix4f, std::less<int>, 
-*           aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
-* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
-* std::map< int, Vector3f > my_map_vec3;
-* \endcode
-*
-* \sa \ref TopicStlContainers.
-*/
+  * \ingroup Core_Module
+  *
+  * \brief STL compatible allocator to use with with 16 byte aligned types
+  *
+  * Example:
+  * \code
+  * // Matrix4f requires 16 bytes alignment:
+  * std::map< int, Matrix4f, std::less<int>,
+  *           aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
+  * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
+  * std::map< int, Vector3f > my_map_vec3;
+  * \endcode
+  *
+  * \sa \blank \ref TopicStlContainers.
+  */
 template<class T>
-class aligned_allocator
+class aligned_allocator : public std::allocator<T>
 {
 public:
-    typedef size_t    size_type;
-    typedef std::ptrdiff_t difference_type;
-    typedef T*        pointer;
-    typedef const T*  const_pointer;
-    typedef T&        reference;
-    typedef const T&  const_reference;
-    typedef T         value_type;
+  typedef size_t          size_type;
+  typedef std::ptrdiff_t  difference_type;
+  typedef T*              pointer;
+  typedef const T*        const_pointer;
+  typedef T&              reference;
+  typedef const T&        const_reference;
+  typedef T               value_type;

-    template<class U>
-    struct rebind
-    {
-        typedef aligned_allocator<U> other;
-    };
+  template<class U>
+  struct rebind
+  {
+    typedef aligned_allocator<U> other;
+  };

-    pointer address( reference value ) const
-    {
-        return &value;
-    }
+  aligned_allocator() : std::allocator<T>() {}

-    const_pointer address( const_reference value ) const
-    {
-        return &value;
-    }
+  aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}

-    aligned_allocator()
-    {
-    }
+  template<class U>
+  aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}

-    aligned_allocator( const aligned_allocator& )
-    {
-    }
+  ~aligned_allocator() {}

-    template<class U>
-    aligned_allocator( const aligned_allocator<U>& )
-    {
-    }
+  pointer allocate(size_type num, const void* /*hint*/ = 0)
+  {
+    internal::check_size_for_overflow<T>(num);
+    return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
+  }

-    ~aligned_allocator()
-    {
-    }
-
-    size_type max_size() const
-    {
-        return (std::numeric_limits<size_type>::max)();
-    }
-
-    pointer allocate( size_type num, const void* hint = 0 )
-    {
-        EIGEN_UNUSED_VARIABLE(hint);
-        internal::check_size_for_overflow<T>(num);
-        return static_cast<pointer>( internal::aligned_malloc( num * sizeof(T) ) );
-    }
-
-    void construct( pointer p, const T& value )
-    {
-        ::new( p ) T( value );
-    }
-
-    void destroy( pointer p )
-    {
-        p->~T();
-    }
-
-    void deallocate( pointer p, size_type /*num*/ )
-    {
-        internal::aligned_free( p );
-    }
-
-    bool operator!=(const aligned_allocator<T>& ) const
-    { return false; }
-
-    bool operator==(const aligned_allocator<T>& ) const
-    { return true; }
+  void deallocate(pointer p, size_type /*num*/)
+  {
+    internal::aligned_free(p);
+  }
 };

 //---------- Cache sizes ----------
--- a/Eigen/src/Core/util/ReenableStupidWarnings.h
+++ b/Eigen/src/Core/util/ReenableStupidWarnings.h
@@ -8,7 +8,10 @@
    #pragma warning pop
  #elif defined __clang__
    #pragma clang diagnostic pop
+  #elif defined __GNUC__ && __GNUC__>=6
+    #pragma GCC diagnostic pop
  #endif
+
 #endif

 #endif // EIGEN_WARNINGS_DISABLED
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -26,7 +26,7 @@

 #ifndef EIGEN_NO_STATIC_ASSERT

-  #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (defined(_MSC_VER) && (_MSC_VER >= 1600))
+  #if __has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600)

    // if native static_assert is enabled, let's use it
    #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
@@ -92,7 +92,8 @@
        THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
        IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
-        STORAGE_LAYOUT_DOES_NOT_MATCH
+        STORAGE_LAYOUT_DOES_NOT_MATCH,
+        CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY
      };
    };

--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -366,17 +366,17 @@ struct dense_xpr_base<Derived, ArrayXpr>

 /** \internal Helper base class to add a scalar multiple operator
  * overloads for complex types */
-template<typename Derived,typename Scalar,typename OtherScalar,
+template<typename Derived, typename Scalar, typename OtherScalar, typename BaseType,
         bool EnableIt = !is_same<Scalar,OtherScalar>::value >
-struct special_scalar_op_base : public DenseCoeffsBase<Derived>
+struct special_scalar_op_base : public BaseType
 {
  // dummy operator* so that the
  // "using special_scalar_op_base::operator*" compiles
  void operator*() const;
 };

-template<typename Derived,typename Scalar,typename OtherScalar>
-struct special_scalar_op_base<Derived,Scalar,OtherScalar,true>  : public DenseCoeffsBase<Derived>
+template<typename Derived,typename Scalar,typename OtherScalar, typename BaseType>
+struct special_scalar_op_base<Derived,Scalar,OtherScalar,BaseType,true>  : public BaseType
 {
  const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
  operator*(const OtherScalar& scalar) const
--- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h
+++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@@ -234,6 +234,12 @@ template<typename _MatrixType> class ComplexEigenSolver
    }

  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+    
    EigenvectorType m_eivec;
    EigenvalueType m_eivalues;
    ComplexSchur<MatrixType> m_schur;
@@ -251,6 +257,8 @@ template<typename MatrixType>
 ComplexEigenSolver<MatrixType>& 
 ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
 {
+  check_template_parameters();
+  
  // this code is inspired from Jampack
  eigen_assert(matrix.cols() == matrix.rows());

--- a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
@@ -45,7 +45,6 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
 ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, bool computeU) \
 { \
  typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> MatrixType; \
-  typedef MatrixType::Scalar Scalar; \
  typedef MatrixType::RealScalar RealScalar; \
  typedef std::complex<RealScalar> ComplexScalar; \
 \
--- a/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/Eigen/src/Eigenvalues/EigenSolver.h
@@ -298,6 +298,13 @@ template<typename _MatrixType> class EigenSolver
    void doComputeEigenvectors();

  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+      EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
+    }
+    
    MatrixType m_eivec;
    EigenvalueType m_eivalues;
    bool m_isInitialized;
@@ -364,6 +371,8 @@ template<typename MatrixType>
 EigenSolver<MatrixType>& 
 EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
 {
+  check_template_parameters();
+  
  using std::sqrt;
  using std::abs;
  eigen_assert(matrix.cols() == matrix.rows());
--- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
+++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@@ -263,6 +263,13 @@ template<typename _MatrixType> class GeneralizedEigenSolver
    }

  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+      EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
+    }
+    
    MatrixType m_eivec;
    ComplexVectorType m_alphas;
    VectorType m_betas;
@@ -290,6 +297,8 @@ template<typename MatrixType>
 GeneralizedEigenSolver<MatrixType>&
 GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors)
 {
+  check_template_parameters();
+  
  using std::sqrt;
  using std::abs;
  eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows());
@@ -318,13 +327,33 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
      }
      else
      {
-        Scalar p = Scalar(0.5) * (m_matS.coeff(i, i) - m_matS.coeff(i+1, i+1));
-        Scalar z = sqrt(abs(p * p + m_matS.coeff(i+1, i) * m_matS.coeff(i, i+1)));
-        m_alphas.coeffRef(i)   = ComplexScalar(m_matS.coeff(i+1, i+1) + p, z);
-        m_alphas.coeffRef(i+1) = ComplexScalar(m_matS.coeff(i+1, i+1) + p, -z);
+        // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a triangular 2x2 block T
+        // From the eigen decomposition of T = U * E * U^-1,
+        // we can extract the eigenvalues of (U^-1 * S * U) / E
+        // Here, we can take advantage that E = diag(T), and U = [ 1 T_01 ; 0 T_11-T_00], and U^-1 = [1 -T_11/(T_11-T_00) ; 0 1/(T_11-T_00)].
+        // Then taking beta=T_00*T_11*(T_11-T_00), we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00) * (T_11-T_00):
+
+        // T =  [a b ; 0 c]
+        // S =  [e f ; g h]
+        RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i, i+1), c = m_realQZ.matrixT().coeff(i+1, i+1);
+        RealScalar e = m_matS.coeff(i, i), f = m_matS.coeff(i, i+1), g = m_matS.coeff(i+1, i), h = m_matS.coeff(i+1, i+1);
+        RealScalar d = c-a;
+        RealScalar gb = g*b;
+        Matrix<RealScalar,2,2> A;
+        A <<  (e*d-gb)*c,  ((e*b+f*d-h*b)*d-gb*b)*a,
+              g*c       ,  (gb+h*d)*a;
+
+        // NOTE, we could also compute the SVD of T's block during the QZ factorization so that the respective T block is guaranteed to be diagonal,
+        // and then we could directly apply the formula below (while taking care of scaling S columns by T11,T00):
+
+        Scalar p = Scalar(0.5) * (A.coeff(i, i) - A.coeff(i+1, i+1));
+        Scalar z = sqrt(abs(p * p + A.coeff(i+1, i) * A.coeff(i, i+1)));
+        m_alphas.coeffRef(i)   = ComplexScalar(A.coeff(i+1, i+1) + p, z);
+        m_alphas.coeffRef(i+1) = ComplexScalar(A.coeff(i+1, i+1) + p, -z);
+
+        m_betas.coeffRef(i)   =
+        m_betas.coeffRef(i+1) = a*c*d;

-        m_betas.coeffRef(i)   = m_realQZ.matrixT().coeff(i,i);
-        m_betas.coeffRef(i+1) = m_realQZ.matrixT().coeff(i,i);
        i += 2;
      }
    }
--- a/Eigen/src/Eigenvalues/RealQZ.h
+++ b/Eigen/src/Eigenvalues/RealQZ.h
@@ -240,10 +240,10 @@ namespace Eigen {
            m_S.coeffRef(i,j) = Scalar(0.0);
            m_S.rightCols(dim-j-1).applyOnTheLeft(i-1,i,G.adjoint());
            m_T.rightCols(dim-i+1).applyOnTheLeft(i-1,i,G.adjoint());
+            // update Q
+            if (m_computeQZ)
+              m_Q.applyOnTheRight(i-1,i,G);
          }
-          // update Q
-          if (m_computeQZ)
-            m_Q.applyOnTheRight(i-1,i,G);
          // kill T(i,i-1)
          if(m_T.coeff(i,i-1)!=Scalar(0))
          {
@@ -251,10 +251,10 @@ namespace Eigen {
            m_T.coeffRef(i,i-1) = Scalar(0.0);
            m_S.applyOnTheRight(i,i-1,G);
            m_T.topRows(i).applyOnTheRight(i,i-1,G);
+            // update Z
+            if (m_computeQZ)
+              m_Z.applyOnTheLeft(i,i-1,G.adjoint());
          }
-          // update Z
-          if (m_computeQZ)
-            m_Z.applyOnTheLeft(i,i-1,G.adjoint());
        }
      }
    }
--- a/Eigen/src/Eigenvalues/RealSchur.h
+++ b/Eigen/src/Eigenvalues/RealSchur.h
@@ -234,7 +234,7 @@ template<typename _MatrixType> class RealSchur
    typedef Matrix<Scalar,3,1> Vector3s;

    Scalar computeNormOfT();
-    Index findSmallSubdiagEntry(Index iu, const Scalar& norm);
+    Index findSmallSubdiagEntry(Index iu);
    void splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift);
    void computeShift(Index iu, Index iter, Scalar& exshift, Vector3s& shiftInfo);
    void initFrancisQRStep(Index il, Index iu, const Vector3s& shiftInfo, Index& im, Vector3s& firstHouseholderVector);
@@ -286,7 +286,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
  {
    while (iu >= 0)
    {
-      Index il = findSmallSubdiagEntry(iu, norm);
+      Index il = findSmallSubdiagEntry(iu);

      // Check for convergence
      if (il == iu) // One root found
@@ -343,16 +343,14 @@ inline typename MatrixType::Scalar RealSchur<MatrixType>::computeNormOfT()

 /** \internal Look for single small sub-diagonal element and returns its index */
 template<typename MatrixType>
-inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu, const Scalar& norm)
+inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu)
 {
  using std::abs;
  Index res = iu;
  while (res > 0)
  {
    Scalar s = abs(m_matT.coeff(res-1,res-1)) + abs(m_matT.coeff(res,res));
-    if (s == 0.0)
-      s = norm;
-    if (abs(m_matT.coeff(res,res-1)) < NumTraits<Scalar>::epsilon() * s)
+    if (abs(m_matT.coeff(res,res-1)) <= NumTraits<Scalar>::epsilon() * s)
      break;
    res--;
  }
@@ -457,9 +455,7 @@ inline void RealSchur<MatrixType>::initFrancisQRStep(Index il, Index iu, const V
    const Scalar lhs = m_matT.coeff(im,im-1) * (abs(v.coeff(1)) + abs(v.coeff(2)));
    const Scalar rhs = v.coeff(0) * (abs(m_matT.coeff(im-1,im-1)) + abs(Tmm) + abs(m_matT.coeff(im+1,im+1)));
    if (abs(lhs) < NumTraits<Scalar>::epsilon() * rhs)
-    {
      break;
-    }
  }
 }

--- a/Eigen/src/Eigenvalues/RealSchur_MKL.h
+++ b/Eigen/src/Eigenvalues/RealSchur_MKL.h
@@ -44,10 +44,6 @@ template<> inline \
 RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
 RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, bool computeU) \
 { \
-  typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> MatrixType; \
-  typedef MatrixType::Scalar Scalar; \
-  typedef MatrixType::RealScalar RealScalar; \
-\
  eigen_assert(matrix.cols() == matrix.rows()); \
 \
  lapack_int n = matrix.cols(), sdim, info; \
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@@ -80,6 +80,8 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
    /** \brief Scalar type for matrices of type \p _MatrixType. */
    typedef typename MatrixType::Scalar Scalar;
    typedef typename MatrixType::Index Index;
+    
+    typedef Matrix<Scalar,Size,Size,ColMajor,MaxColsAtCompileTime,MaxColsAtCompileTime> EigenvectorsType;

    /** \brief Real scalar type for \p _MatrixType.
      *
@@ -225,7 +227,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
      *
      * \sa eigenvalues()
      */
-    const MatrixType& eigenvectors() const
+    const EigenvectorsType& eigenvectors() const
    {
      eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
      eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues.");
@@ -351,7 +353,12 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
    #endif // EIGEN2_SUPPORT

  protected:
-    MatrixType m_eivec;
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+    
+    EigenvectorsType m_eivec;
    RealVectorType m_eivalues;
    typename TridiagonalizationType::SubDiagonalType m_subdiag;
    ComputationInfo m_info;
@@ -376,7 +383,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
  * "implicit symmetric QR step with Wilkinson shift"
  */
 namespace internal {
-template<int StorageOrder,typename RealScalar, typename Scalar, typename Index>
+template<typename RealScalar, typename Scalar, typename Index>
 static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index start, Index end, Scalar* matrixQ, Index n);
 }

@@ -384,6 +391,8 @@ template<typename MatrixType>
 SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
 ::compute(const MatrixType& matrix, int options)
 {
+  check_template_parameters();
+  
  using std::abs;
  eigen_assert(matrix.cols() == matrix.rows());
  eigen_assert((options&~(EigVecMask|GenEigMask))==0
@@ -406,7 +415,7 @@ SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>

  // declare some aliases
  RealVectorType& diag = m_eivalues;
-  MatrixType& mat = m_eivec;
+  EigenvectorsType& mat = m_eivec;

  // map the matrix coefficients to [-1:1] to avoid over- and underflow.
  mat = matrix.template triangularView<Lower>();
@@ -442,7 +451,7 @@ SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
    while (start>0 && m_subdiag[start-1]!=0)
      start--;

-    internal::tridiagonal_qr_step<MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor>(diag.data(), m_subdiag.data(), start, end, computeEigenvectors ? m_eivec.data() : (Scalar*)0, n);
+    internal::tridiagonal_qr_step(diag.data(), m_subdiag.data(), start, end, computeEigenvectors ? m_eivec.data() : (Scalar*)0, n);
  }

  if (iter <= m_maxIterations * n)
@@ -490,7 +499,13 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
  typedef typename SolverType::MatrixType MatrixType;
  typedef typename SolverType::RealVectorType VectorType;
  typedef typename SolverType::Scalar Scalar;
+  typedef typename MatrixType::Index Index;
+  typedef typename SolverType::EigenvectorsType EigenvectorsType;
  
+  /** \internal
+   * Computes the roots of the characteristic polynomial of \a m.
+   * For numerical stability m.trace() should be near zero and to avoid over- or underflow m should be normalized.
+   */
  static inline void computeRoots(const MatrixType& m, VectorType& roots)
  {
    using std::sqrt;
@@ -510,158 +525,123 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
    // Construct the parameters used in classifying the roots of the equation
    // and in solving the equation for the roots in closed form.
    Scalar c2_over_3 = c2*s_inv3;
-    Scalar a_over_3 = (c1 - c2*c2_over_3)*s_inv3;
-    if (a_over_3 > Scalar(0))
+    Scalar a_over_3 = (c2*c2_over_3 - c1)*s_inv3;
+    if(a_over_3<Scalar(0))
      a_over_3 = Scalar(0);

    Scalar half_b = Scalar(0.5)*(c0 + c2_over_3*(Scalar(2)*c2_over_3*c2_over_3 - c1));

-    Scalar q = half_b*half_b + a_over_3*a_over_3*a_over_3;
-    if (q > Scalar(0))
+    Scalar q = a_over_3*a_over_3*a_over_3 - half_b*half_b;
+    if(q<Scalar(0))
      q = Scalar(0);

    // Compute the eigenvalues by solving for the roots of the polynomial.
-    Scalar rho = sqrt(-a_over_3);
-    Scalar theta = atan2(sqrt(-q),half_b)*s_inv3;
+    Scalar rho = sqrt(a_over_3);
+    Scalar theta = atan2(sqrt(q),half_b)*s_inv3;  // since sqrt(q) > 0, atan2 is in [0, pi] and theta is in [0, pi/3]
    Scalar cos_theta = cos(theta);
    Scalar sin_theta = sin(theta);
-    roots(0) = c2_over_3 + Scalar(2)*rho*cos_theta;
-    roots(1) = c2_over_3 - rho*(cos_theta + s_sqrt3*sin_theta);
-    roots(2) = c2_over_3 - rho*(cos_theta - s_sqrt3*sin_theta);
-
-    // Sort in increasing order.
-    if (roots(0) >= roots(1))
-      std::swap(roots(0),roots(1));
-    if (roots(1) >= roots(2))
-    {
-      std::swap(roots(1),roots(2));
-      if (roots(0) >= roots(1))
-        std::swap(roots(0),roots(1));
-    }
+    // roots are already sorted, since cos is monotonically decreasing on [0, pi]
+    roots(0) = c2_over_3 - rho*(cos_theta + s_sqrt3*sin_theta); // == 2*rho*cos(theta+2pi/3)
+    roots(1) = c2_over_3 - rho*(cos_theta - s_sqrt3*sin_theta); // == 2*rho*cos(theta+ pi/3)
+    roots(2) = c2_over_3 + Scalar(2)*rho*cos_theta;
  }
-  
+
+  static inline bool extract_kernel(MatrixType& mat, Ref<VectorType> res, Ref<VectorType> representative)
+  {
+    using std::abs;
+    Index i0;
+    // Find non-zero column i0 (by construction, there must exist a non zero coefficient on the diagonal):
+    mat.diagonal().cwiseAbs().maxCoeff(&i0);
+    // mat.col(i0) is a good candidate for an orthogonal vector to the current eigenvector,
+    // so let's save it:
+    representative = mat.col(i0);
+    Scalar n0, n1;
+    VectorType c0, c1;
+    n0 = (c0 = representative.cross(mat.col((i0+1)%3))).squaredNorm();
+    n1 = (c1 = representative.cross(mat.col((i0+2)%3))).squaredNorm();
+    if(n0>n1) res = c0/std::sqrt(n0);
+    else      res = c1/std::sqrt(n1);
+
+    return true;
+  }
+
  static inline void run(SolverType& solver, const MatrixType& mat, int options)
  {
-    using std::sqrt;
    eigen_assert(mat.cols() == 3 && mat.cols() == mat.rows());
    eigen_assert((options&~(EigVecMask|GenEigMask))==0
            && (options&EigVecMask)!=EigVecMask
            && "invalid option parameter");
    bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors;
    
-    MatrixType& eivecs = solver.m_eivec;
+    EigenvectorsType& eivecs = solver.m_eivec;
    VectorType& eivals = solver.m_eivalues;
  
-    // map the matrix coefficients to [-1:1] to avoid over- and underflow.
-    Scalar scale = mat.cwiseAbs().maxCoeff();
-    MatrixType scaledMat = mat / scale;
+    // Shift the matrix to the mean eigenvalue and map the matrix coefficients to [-1:1] to avoid over- and underflow.
+    Scalar shift = mat.trace() / Scalar(3);
+    // TODO Avoid this copy. Currently it is necessary to suppress bogus values when determining maxCoeff and for computing the eigenvectors later
+    MatrixType scaledMat = mat.template selfadjointView<Lower>();
+    scaledMat.diagonal().array() -= shift;
+    Scalar scale = scaledMat.cwiseAbs().maxCoeff();
+    if(scale > 0) scaledMat /= scale;   // TODO for scale==0 we could save the remaining operations

    // compute the eigenvalues
    computeRoots(scaledMat,eivals);

-    // compute the eigen vectors
+    // compute the eigenvectors
    if(computeEigenvectors)
    {
-      Scalar safeNorm2 = Eigen::NumTraits<Scalar>::epsilon();
      if((eivals(2)-eivals(0))<=Eigen::NumTraits<Scalar>::epsilon())
      {
+        // All three eigenvalues are numerically the same
        eivecs.setIdentity();
      }
      else
      {
-        scaledMat = scaledMat.template selfadjointView<Lower>();
        MatrixType tmp;
        tmp = scaledMat;

+        // Compute the eigenvector of the most distinct eigenvalue
        Scalar d0 = eivals(2) - eivals(1);
        Scalar d1 = eivals(1) - eivals(0);
-        int k =  d0 > d1 ? 2 : 0;
-        d0 = d0 > d1 ? d0 : d1;
-
-        tmp.diagonal().array () -= eivals(k);
-        VectorType cross;
-        Scalar n;
-        n = (cross = tmp.row(0).cross(tmp.row(1))).squaredNorm();
-
-        if(n>safeNorm2)
+        Index k(0), l(2);
+        if(d0 > d1)
        {
-          eivecs.col(k) = cross / sqrt(n);
+          std::swap(k,l);
+          d0 = d1;
+        }
+
+        // Compute the eigenvector of index k
+        {
+          tmp.diagonal().array () -= eivals(k);
+          // By construction, 'tmp' is of rank 2, and its kernel corresponds to the respective eigenvector.
+          extract_kernel(tmp, eivecs.col(k), eivecs.col(l));
+        }
+
+        // Compute eigenvector of index l
+        if(d0<=2*Eigen::NumTraits<Scalar>::epsilon()*d1)
+        {
+          // If d0 is too small, then the two other eigenvalues are numerically the same,
+          // and thus we only have to ortho-normalize the near orthogonal vector we saved above.
+          eivecs.col(l) -= eivecs.col(k).dot(eivecs.col(l))*eivecs.col(l);
+          eivecs.col(l).normalize();
        }
        else
        {
-          n = (cross = tmp.row(0).cross(tmp.row(2))).squaredNorm();
+          tmp = scaledMat;
+          tmp.diagonal().array () -= eivals(l);

-          if(n>safeNorm2)
-          {
-            eivecs.col(k) = cross / sqrt(n);
-          }
-          else
-          {
-            n = (cross = tmp.row(1).cross(tmp.row(2))).squaredNorm();
-
-            if(n>safeNorm2)
-            {
-              eivecs.col(k) = cross / sqrt(n);
-            }
-            else
-            {
-              // the input matrix and/or the eigenvaues probably contains some inf/NaN,
-              // => exit
-              // scale back to the original size.
-              eivals *= scale;
-
-              solver.m_info = NumericalIssue;
-              solver.m_isInitialized = true;
-              solver.m_eigenvectorsOk = computeEigenvectors;
-              return;
-            }
-          }
+          VectorType dummy;
+          extract_kernel(tmp, eivecs.col(l), dummy);
        }

-        tmp = scaledMat;
-        tmp.diagonal().array() -= eivals(1);
-
-        if(d0<=Eigen::NumTraits<Scalar>::epsilon())
-        {
-          eivecs.col(1) = eivecs.col(k).unitOrthogonal();
-        }
-        else
-        {
-          n = (cross = eivecs.col(k).cross(tmp.row(0))).squaredNorm();
-          if(n>safeNorm2)
-          {
-            eivecs.col(1) = cross / sqrt(n);
-          }
-          else
-          {
-            n = (cross = eivecs.col(k).cross(tmp.row(1))).squaredNorm();
-            if(n>safeNorm2)
-              eivecs.col(1) = cross / sqrt(n);
-            else
-            {
-              n = (cross = eivecs.col(k).cross(tmp.row(2))).squaredNorm();
-              if(n>safeNorm2)
-                eivecs.col(1) = cross / sqrt(n);
-              else
-              {
-                // we should never reach this point,
-                // if so the last two eigenvalues are likely to be very close to each other
-                eivecs.col(1) = eivecs.col(k).unitOrthogonal();
-              }
-            }
-          }
-
-          // make sure that eivecs[1] is orthogonal to eivecs[2]
-          // FIXME: this step should not be needed
-          Scalar d = eivecs.col(1).dot(eivecs.col(k));
-          eivecs.col(1) = (eivecs.col(1) - d * eivecs.col(k)).normalized();
-        }
-
-        eivecs.col(k==2 ? 0 : 2) = eivecs.col(k).cross(eivecs.col(1)).normalized();
+        // Compute last eigenvector from the other two
+        eivecs.col(1) = eivecs.col(2).cross(eivecs.col(0)).normalized();
      }
    }
+
    // Rescale back to the original size.
    eivals *= scale;
+    eivals.array() += shift;
    
    solver.m_info = Success;
    solver.m_isInitialized = true;
@@ -675,11 +655,12 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,2
  typedef typename SolverType::MatrixType MatrixType;
  typedef typename SolverType::RealVectorType VectorType;
  typedef typename SolverType::Scalar Scalar;
+  typedef typename SolverType::EigenvectorsType EigenvectorsType;
  
  static inline void computeRoots(const MatrixType& m, VectorType& roots)
  {
    using std::sqrt;
-    const Scalar t0 = Scalar(0.5) * sqrt( numext::abs2(m(0,0)-m(1,1)) + Scalar(4)*m(1,0)*m(1,0));
+    const Scalar t0 = Scalar(0.5) * sqrt( numext::abs2(m(0,0)-m(1,1)) + Scalar(4)*numext::abs2(m(1,0)));
    const Scalar t1 = Scalar(0.5) * (m(0,0) + m(1,1));
    roots(0) = t1 - t0;
    roots(1) = t1 + t0;
@@ -688,13 +669,15 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,2
  static inline void run(SolverType& solver, const MatrixType& mat, int options)
  {
    using std::sqrt;
+    using std::abs;
+
    eigen_assert(mat.cols() == 2 && mat.cols() == mat.rows());
    eigen_assert((options&~(EigVecMask|GenEigMask))==0
            && (options&EigVecMask)!=EigVecMask
            && "invalid option parameter");
    bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors;
    
-    MatrixType& eivecs = solver.m_eivec;
+    EigenvectorsType& eivecs = solver.m_eivec;
    VectorType& eivals = solver.m_eivalues;
  
    // map the matrix coefficients to [-1:1] to avoid over- and underflow.
@@ -708,22 +691,29 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,2
    // compute the eigen vectors
    if(computeEigenvectors)
    {
-      scaledMat.diagonal().array () -= eivals(1);
-      Scalar a2 = numext::abs2(scaledMat(0,0));
-      Scalar c2 = numext::abs2(scaledMat(1,1));
-      Scalar b2 = numext::abs2(scaledMat(1,0));
-      if(a2>c2)
+      if((eivals(1)-eivals(0))<=abs(eivals(1))*Eigen::NumTraits<Scalar>::epsilon())
      {
-        eivecs.col(1) << -scaledMat(1,0), scaledMat(0,0);
-        eivecs.col(1) /= sqrt(a2+b2);
+        eivecs.setIdentity();
      }
      else
      {
-        eivecs.col(1) << -scaledMat(1,1), scaledMat(1,0);
-        eivecs.col(1) /= sqrt(c2+b2);
-      }
+        scaledMat.diagonal().array () -= eivals(1);
+        Scalar a2 = numext::abs2(scaledMat(0,0));
+        Scalar c2 = numext::abs2(scaledMat(1,1));
+        Scalar b2 = numext::abs2(scaledMat(1,0));
+        if(a2>c2)
+        {
+          eivecs.col(1) << -scaledMat(1,0), scaledMat(0,0);
+          eivecs.col(1) /= sqrt(a2+b2);
+        }
+        else
+        {
+          eivecs.col(1) << -scaledMat(1,1), scaledMat(1,0);
+          eivecs.col(1) /= sqrt(c2+b2);
+        }

-      eivecs.col(0) << eivecs.col(1).unitOrthogonal();
+        eivecs.col(0) << eivecs.col(1).unitOrthogonal();
+      }
    }
    
    // Rescale back to the original size.
@@ -746,7 +736,7 @@ SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
 }

 namespace internal {
-template<int StorageOrder,typename RealScalar, typename Scalar, typename Index>
+template<typename RealScalar, typename Scalar, typename Index>
 static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index start, Index end, Scalar* matrixQ, Index n)
 {
  using std::abs;
@@ -798,8 +788,7 @@ static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index sta
    // apply the givens rotation to the unit matrix Q = Q * G
    if (matrixQ)
    {
-      // FIXME if StorageOrder == RowMajor this operation is not very efficient
-      Map<Matrix<Scalar,Dynamic,Dynamic,StorageOrder> > q(matrixQ,n,n);
+      Map<Matrix<Scalar,Dynamic,Dynamic,ColMajor> > q(matrixQ,n,n);
      q.applyOnTheRight(k,k+1,rot);
    }
  }
--- a/Eigen/src/Eigenvalues/Tridiagonalization.h
+++ b/Eigen/src/Eigenvalues/Tridiagonalization.h
@@ -367,10 +367,10 @@ void tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs)
    hCoeffs.tail(n-i-1).noalias() = (matA.bottomRightCorner(remainingSize,remainingSize).template selfadjointView<Lower>()
                                  * (conj(h) * matA.col(i).tail(remainingSize)));

-    hCoeffs.tail(n-i-1) += (conj(h)*Scalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1);
+    hCoeffs.tail(n-i-1) += (conj(h)*RealScalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1);

    matA.bottomRightCorner(remainingSize, remainingSize).template selfadjointView<Lower>()
-      .rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), -1);
+      .rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), Scalar(-1));

    matA.col(i).coeffRef(i+1) = beta;
    hCoeffs.coeffRef(i) = h;
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h
@@ -19,10 +19,12 @@ namespace Eigen {
  *
  * \brief An axis aligned box
  *
-  * \param _Scalar the type of the scalar coefficients
-  * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
+  * \tparam _Scalar the type of the scalar coefficients
+  * \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
  *
  * This class represents an axis aligned box as a pair of the minimal and maximal corners.
+  * \warning The result of most methods is undefined when applied to an empty box. You can check for empty boxes using isEmpty().
+  * \sa alignedboxtypedefs
  */
 template <typename _Scalar, int _AmbientDim>
 class AlignedBox
@@ -40,18 +42,21 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
  /** Define constants to name the corners of a 1D, 2D or 3D axis aligned bounding box */
  enum CornerType
  {
-    /** 1D names */
+    /** 1D names @{ */
    Min=0, Max=1,
+    /** @} */

-    /** Added names for 2D */
+    /** Identifier for 2D corner @{ */
    BottomLeft=0, BottomRight=1,
    TopLeft=2, TopRight=3,
+    /** @} */

-    /** Added names for 3D */
+    /** Identifier for 3D corner  @{ */
    BottomLeftFloor=0, BottomRightFloor=1,
    TopLeftFloor=2, TopRightFloor=3,
    BottomLeftCeil=4, BottomRightCeil=5,
    TopLeftCeil=6, TopRightCeil=7
+    /** @} */
  };


@@ -63,34 +68,33 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
  inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim)
  { setEmpty(); }

-  /** Constructs a box with extremities \a _min and \a _max. */
+  /** Constructs a box with extremities \a _min and \a _max.
+   * \warning If either component of \a _min is larger than the same component of \a _max, the constructed box is empty. */
  template<typename OtherVectorType1, typename OtherVectorType2>
  inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {}

  /** Constructs a box containing a single point \a p. */
  template<typename Derived>
-  inline explicit AlignedBox(const MatrixBase<Derived>& a_p)
-  {
-    typename internal::nested<Derived,2>::type p(a_p.derived());
-    m_min = p;
-    m_max = p;
-  }
+  inline explicit AlignedBox(const MatrixBase<Derived>& p) : m_min(p), m_max(m_min)
+  { }

  ~AlignedBox() {}

  /** \returns the dimension in which the box holds */
  inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); }

-  /** \deprecated use isEmpty */
+  /** \deprecated use isEmpty() */
  inline bool isNull() const { return isEmpty(); }

-  /** \deprecated use setEmpty */
+  /** \deprecated use setEmpty() */
  inline void setNull() { setEmpty(); }

-  /** \returns true if the box is empty. */
+  /** \returns true if the box is empty.
+   * \sa setEmpty */
  inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); }

-  /** Makes \c *this an empty box. */
+  /** Makes \c *this an empty box.
+   * \sa isEmpty */
  inline void setEmpty()
  {
    m_min.setConstant( ScalarTraits::highest() );
@@ -159,7 +163,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
   * a uniform distribution */
  inline VectorType sample() const
  {
-    VectorType r;
+    VectorType r(dim());
    for(Index d=0; d<dim(); ++d)
    {
      if(!ScalarTraits::IsInteger)
@@ -175,27 +179,34 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)

  /** \returns true if the point \a p is inside the box \c *this. */
  template<typename Derived>
-  inline bool contains(const MatrixBase<Derived>& a_p) const
+  inline bool contains(const MatrixBase<Derived>& p) const
  {
-    typename internal::nested<Derived,2>::type p(a_p.derived());
-    return (m_min.array()<=p.array()).all() && (p.array()<=m_max.array()).all();
+    typename internal::nested<Derived,2>::type p_n(p.derived());
+    return (m_min.array()<=p_n.array()).all() && (p_n.array()<=m_max.array()).all();
  }

  /** \returns true if the box \a b is entirely inside the box \c *this. */
  inline bool contains(const AlignedBox& b) const
  { return (m_min.array()<=(b.min)().array()).all() && ((b.max)().array()<=m_max.array()).all(); }

-  /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this. */
+  /** \returns true if the box \a b is intersecting the box \c *this.
+   * \sa intersection, clamp */
+  inline bool intersects(const AlignedBox& b) const
+  { return (m_min.array()<=(b.max)().array()).all() && ((b.min)().array()<=m_max.array()).all(); }
+
+  /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this.
+   * \sa extend(const AlignedBox&) */
  template<typename Derived>
-  inline AlignedBox& extend(const MatrixBase<Derived>& a_p)
+  inline AlignedBox& extend(const MatrixBase<Derived>& p)
  {
-    typename internal::nested<Derived,2>::type p(a_p.derived());
-    m_min = m_min.cwiseMin(p);
-    m_max = m_max.cwiseMax(p);
+    typename internal::nested<Derived,2>::type p_n(p.derived());
+    m_min = m_min.cwiseMin(p_n);
+    m_max = m_max.cwiseMax(p_n);
    return *this;
  }

-  /** Extends \c *this such that it contains the box \a b and returns a reference to \c *this. */
+  /** Extends \c *this such that it contains the box \a b and returns a reference to \c *this.
+   * \sa merged, extend(const MatrixBase&) */
  inline AlignedBox& extend(const AlignedBox& b)
  {
    m_min = m_min.cwiseMin(b.m_min);
@@ -203,7 +214,9 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
    return *this;
  }

-  /** Clamps \c *this by the box \a b and returns a reference to \c *this. */
+  /** Clamps \c *this by the box \a b and returns a reference to \c *this.
+   * \note If the boxes don't intersect, the resulting box is empty.
+   * \sa intersection(), intersects() */
  inline AlignedBox& clamp(const AlignedBox& b)
  {
    m_min = m_min.cwiseMax(b.m_min);
@@ -211,11 +224,15 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
    return *this;
  }

-  /** Returns an AlignedBox that is the intersection of \a b and \c *this */
+  /** Returns an AlignedBox that is the intersection of \a b and \c *this
+   * \note If the boxes don't intersect, the resulting box is empty.
+   * \sa intersects(), clamp, contains()  */
  inline AlignedBox intersection(const AlignedBox& b) const
  {return AlignedBox(m_min.cwiseMax(b.m_min), m_max.cwiseMin(b.m_max)); }

-  /** Returns an AlignedBox that is the union of \a b and \c *this */
+  /** Returns an AlignedBox that is the union of \a b and \c *this.
+   * \note Merging with an empty box may result in a box bigger than \c *this. 
+   * \sa extend(const AlignedBox&) */
  inline AlignedBox merged(const AlignedBox& b) const
  { return AlignedBox(m_min.cwiseMin(b.m_min), m_max.cwiseMax(b.m_max)); }

@@ -231,20 +248,20 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)

  /** \returns the squared distance between the point \a p and the box \c *this,
    * and zero if \a p is inside the box.
-    * \sa exteriorDistance()
+    * \sa exteriorDistance(const MatrixBase&), squaredExteriorDistance(const AlignedBox&)
    */
  template<typename Derived>
-  inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& a_p) const;
+  inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& p) const;

  /** \returns the squared distance between the boxes \a b and \c *this,
    * and zero if the boxes intersect.
-    * \sa exteriorDistance()
+    * \sa exteriorDistance(const AlignedBox&), squaredExteriorDistance(const MatrixBase&)
    */
  inline Scalar squaredExteriorDistance(const AlignedBox& b) const;

  /** \returns the distance between the point \a p and the box \c *this,
    * and zero if \a p is inside the box.
-    * \sa squaredExteriorDistance()
+    * \sa squaredExteriorDistance(const MatrixBase&), exteriorDistance(const AlignedBox&)
    */
  template<typename Derived>
  inline NonInteger exteriorDistance(const MatrixBase<Derived>& p) const
@@ -252,7 +269,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)

  /** \returns the distance between the boxes \a b and \c *this,
    * and zero if the boxes intersect.
-    * \sa squaredExteriorDistance()
+    * \sa squaredExteriorDistance(const AlignedBox&), exteriorDistance(const MatrixBase&)
    */
  inline NonInteger exteriorDistance(const AlignedBox& b) const
  { using std::sqrt; return sqrt(NonInteger(squaredExteriorDistance(b))); }
--- a/Eigen/src/Geometry/AngleAxis.h
+++ b/Eigen/src/Geometry/AngleAxis.h
@@ -83,10 +83,17 @@ public:
  template<typename Derived>
  inline explicit AngleAxis(const MatrixBase<Derived>& m) { *this = m; }

+  /** \returns the value of the rotation angle in radian */
  Scalar angle() const { return m_angle; }
+  /** \returns a read-write reference to the stored angle in radian */
  Scalar& angle() { return m_angle; }

+  /** \returns the rotation axis */
  const Vector3& axis() const { return m_axis; }
+  /** \returns a read-write reference to the stored rotation axis.
+    *
+    * \warning The rotation axis must remain a \b unit vector.
+    */
  Vector3& axis() { return m_axis; }

  /** Concatenates two rotations */
@@ -131,7 +138,7 @@ public:
    m_angle = Scalar(other.angle());
  }

-  static inline const AngleAxis Identity() { return AngleAxis(0, Vector3::UnitX()); }
+  static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); }

  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
    * determined by \a prec.
@@ -165,8 +172,8 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived
  Scalar n2 = q.vec().squaredNorm();
  if (n2 < NumTraits<Scalar>::dummy_precision()*NumTraits<Scalar>::dummy_precision())
  {
-    m_angle = 0;
-    m_axis << 1, 0, 0;
+    m_angle = Scalar(0);
+    m_axis << Scalar(1), Scalar(0), Scalar(0);
  }
  else
  {
--- a/Eigen/src/Geometry/Homogeneous.h
+++ b/Eigen/src/Geometry/Homogeneous.h
@@ -75,11 +75,11 @@ template<typename MatrixType,int _Direction> class Homogeneous
    inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical   ? 1 : 0); }
    inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); }

-    inline Scalar coeff(Index row, Index col) const
+    inline Scalar coeff(Index row, Index col=0) const
    {
      if(  (int(Direction)==Vertical   && row==m_matrix.rows())
        || (int(Direction)==Horizontal && col==m_matrix.cols()))
-        return 1;
+        return Scalar(1);
      return m_matrix.coeff(row, col);
    }

--- a/Eigen/src/Geometry/Hyperplane.h
+++ b/Eigen/src/Geometry/Hyperplane.h
@@ -218,7 +218,10 @@ public:
  inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)
  {
    if (traits==Affine)
+    {
      normal() = mat.inverse().transpose() * normal();
+      m_coeffs /= normal().norm();
+    }
    else if (traits==Isometry)
      normal() = mat * normal();
    else
--- a/Eigen/src/Geometry/ParametrizedLine.h
+++ b/Eigen/src/Geometry/ParametrizedLine.h
@@ -129,7 +129,7 @@ public:
    * determined by \a prec.
    *
    * \sa MatrixBase::isApprox() */
-  bool isApprox(const ParametrizedLine& other, typename NumTraits<Scalar>::Real prec = NumTraits<Scalar>::dummy_precision()) const
+  bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
  { return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); }

 protected:
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -102,11 +102,11 @@ public:
  /** \returns a quaternion representing an identity rotation
    * \sa MatrixBase::Identity()
    */
-  static inline Quaternion<Scalar> Identity() { return Quaternion<Scalar>(1, 0, 0, 0); }
+  static inline Quaternion<Scalar> Identity() { return Quaternion<Scalar>(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); }

  /** \sa QuaternionBase::Identity(), MatrixBase::setIdentity()
    */
-  inline QuaternionBase& setIdentity() { coeffs() << 0, 0, 0, 1; return *this; }
+  inline QuaternionBase& setIdentity() { coeffs() << Scalar(0), Scalar(0), Scalar(0), Scalar(1); return *this; }

  /** \returns the squared norm of the quaternion's coefficients
    * \sa QuaternionBase::norm(), MatrixBase::squaredNorm()
@@ -161,7 +161,7 @@ public:
  { return coeffs().isApprox(other.coeffs(), prec); }

 	/** return the result vector of \a v through the rotation*/
-  EIGEN_STRONG_INLINE Vector3 _transformVector(Vector3 v) const;
+  EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;

  /** \returns \c *this with scalar type casted to \a NewScalarType
    *
@@ -231,7 +231,7 @@ class Quaternion : public QuaternionBase<Quaternion<_Scalar,_Options> >
 public:
  typedef _Scalar Scalar;

-  EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Quaternion)
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Quaternion)
  using Base::operator*=;

  typedef typename internal::traits<Quaternion>::Coefficients Coefficients;
@@ -276,7 +276,7 @@ public:
  inline Coefficients& coeffs() { return m_coeffs;}
  inline const Coefficients& coeffs() const { return m_coeffs;}

-  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(IsAligned)
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(IsAligned))

 protected:
  Coefficients m_coeffs;
@@ -341,7 +341,7 @@ class Map<const Quaternion<_Scalar>, _Options >
  public:
    typedef _Scalar Scalar;
    typedef typename internal::traits<Map>::Coefficients Coefficients;
-    EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Map)
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
    using Base::operator*=;

    /** Constructs a Mapped Quaternion object from the pointer \a coeffs
@@ -378,7 +378,7 @@ class Map<Quaternion<_Scalar>, _Options >
  public:
    typedef _Scalar Scalar;
    typedef typename internal::traits<Map>::Coefficients Coefficients;
-    EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Map)
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
    using Base::operator*=;

    /** Constructs a Mapped Quaternion object from the pointer \a coeffs
@@ -461,7 +461,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const Quaterni
  */
 template <class Derived>
 EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3
-QuaternionBase<Derived>::_transformVector(Vector3 v) const
+QuaternionBase<Derived>::_transformVector(const Vector3& v) const
 {
    // Note that this algorithm comes from the optimization by hand
    // of the conversion to a Matrix followed by a Matrix/Vector product.
@@ -637,7 +637,7 @@ inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Der
 {
  // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite()  ??
  Scalar n2 = this->squaredNorm();
-  if (n2 > 0)
+  if (n2 > Scalar(0))
    return Quaternion<Scalar>(conjugate().coeffs() / n2);
  else
  {
@@ -667,12 +667,10 @@ template <class OtherDerived>
 inline typename internal::traits<Derived>::Scalar
 QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& other) const
 {
-  using std::acos;
+  using std::atan2;
  using std::abs;
-  Scalar d = abs(this->dot(other));
-  if (d>=Scalar(1))
-    return Scalar(0);
-  return Scalar(2) * acos(d);
+  Quaternion<Scalar> d = (*this) * other.conjugate();
+  return Scalar(2) * atan2( d.vec().norm(), abs(d.w()) );
 }

 
@@ -712,7 +710,7 @@ QuaternionBase<Derived>::slerp(const Scalar& t, const QuaternionBase<OtherDerive
    scale0 = sin( ( Scalar(1) - t ) * theta) / sinTheta;
    scale1 = sin( ( t * theta) ) / sinTheta;
  }
-  if(d<0) scale1 = -scale1;
+  if(d<Scalar(0)) scale1 = -scale1;

  return Quaternion<Scalar>(scale0 * coeffs() + scale1 * other.coeffs());
 }
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -102,15 +102,15 @@ template<int Mode> struct transform_make_affine;
  *
  * However, unlike a plain matrix, the Transform class provides many features
  * simplifying both its assembly and usage. In particular, it can be composed
-  * with any other transformations (Transform,Translation,RotationBase,Matrix)
+  * with any other transformations (Transform,Translation,RotationBase,DiagonalMatrix)
  * and can be directly used to transform implicit homogeneous vectors. All these
  * operations are handled via the operator*. For the composition of transformations,
  * its principle consists to first convert the right/left hand sides of the product
  * to a compatible (Dim+1)^2 matrix and then perform a pure matrix product.
  * Of course, internally, operator* tries to perform the minimal number of operations
  * according to the nature of each terms. Likewise, when applying the transform
-  * to non homogeneous vectors, the latters are automatically promoted to homogeneous
-  * one before doing the matrix product. The convertions to homogeneous representations
+  * to points, the latters are automatically promoted to homogeneous vectors
+  * before doing the matrix product. The conventions to homogeneous representations
  * are performed as follow:
  *
  * \b Translation t (Dim)x(1):
@@ -124,7 +124,7 @@ template<int Mode> struct transform_make_affine;
  * R & 0\\
  * 0\,...\,0 & 1
  * \end{array} \right) \f$
-  *
+  *<!--
  * \b Linear \b Matrix L (Dim)x(Dim):
  * \f$ \left( \begin{array}{cc}
  * L & 0\\
@@ -136,14 +136,20 @@ template<int Mode> struct transform_make_affine;
  * A\\
  * 0\,...\,0\,1
  * \end{array} \right) \f$
+  *-->
+  * \b Scaling \b DiagonalMatrix S (Dim)x(Dim):
+  * \f$ \left( \begin{array}{cc}
+  * S & 0\\
+  * 0\,...\,0 & 1
+  * \end{array} \right) \f$
  *
-  * \b Column \b vector v (Dim)x(1):
+  * \b Column \b point v (Dim)x(1):
  * \f$ \left( \begin{array}{c}
  * v\\
  * 1
  * \end{array} \right) \f$
  *
-  * \b Set \b of \b column \b vectors V1...Vn (Dim)x(n):
+  * \b Set \b of \b column \b points V1...Vn (Dim)x(n):
  * \f$ \left( \begin{array}{ccc}
  * v_1 & ... & v_n\\
  * 1 & ... & 1
@@ -384,26 +390,39 @@ public:
  /** \returns a writable expression of the translation vector of the transformation */
  inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); }

-  /** \returns an expression of the product between the transform \c *this and a matrix expression \a other
+  /** \returns an expression of the product between the transform \c *this and a matrix expression \a other.
    *
-    * The right hand side \a other might be either:
-    * \li a vector of size Dim,
+    * The right-hand-side \a other can be either:
    * \li an homogeneous vector of size Dim+1,
-    * \li a set of vectors of size Dim x Dynamic,
-    * \li a set of homogeneous vectors of size Dim+1 x Dynamic,
-    * \li a linear transformation matrix of size Dim x Dim,
-    * \li an affine transformation matrix of size Dim x Dim+1,
+    * \li a set of homogeneous vectors of size Dim+1 x N,
    * \li a transformation matrix of size Dim+1 x Dim+1.
+    *
+    * Moreover, if \c *this represents an affine transformation (i.e., Mode!=Projective), then \a other can also be:
+    * \li a point of size Dim (computes: \code this->linear() * other + this->translation()\endcode),
+    * \li a set of N points as a Dim x N matrix (computes: \code (this->linear() * other).colwise() + this->translation()\endcode),
+    *
+    * In all cases, the return type is a matrix or vector of same sizes as the right-hand-side \a other.
+    *
+    * If you want to interpret \a other as a linear or affine transformation, then first convert it to a Transform<> type,
+    * or do your own cooking.
+    *
+    * Finally, if you want to apply Affine transformations to vectors, then explicitly apply the linear part only:
+    * \code
+    * Affine3f A;
+    * Vector3f v1, v2;
+    * v2 = A.linear() * v1;
+    * \endcode
+    *
    */
  // note: this function is defined here because some compilers cannot find the respective declaration
  template<typename OtherDerived>
-  EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl<Transform, OtherDerived>::ResultType
+  EIGEN_STRONG_INLINE const typename OtherDerived::PlainObject
  operator * (const EigenBase<OtherDerived> &other) const
  { return internal::transform_right_product_impl<Transform, OtherDerived>::run(*this,other.derived()); }

  /** \returns the product expression of a transformation matrix \a a times a transform \a b
    *
-    * The left hand side \a other might be either:
+    * The left hand side \a other can be either:
    * \li a linear transformation matrix of size Dim x Dim,
    * \li an affine transformation matrix of size Dim x Dim+1,
    * \li a general transformation matrix of size Dim+1 x Dim+1.
@@ -424,7 +443,7 @@ public:
    operator * (const DiagonalBase<DiagonalDerived> &b) const
  {
    TransformTimeDiagonalReturnType res(*this);
-    res.linear() *= b;
+    res.linearExt() *= b;
    return res;
  }

@@ -538,7 +557,7 @@ public:
    return res;
  }

-  inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linear() *= s; return *this; }
+  inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linearExt() *= s; return *this; }

  template<typename Derived>
  inline Transform& operator=(const RotationBase<Derived,Dim>& r);
@@ -809,7 +828,7 @@ Transform<Scalar,Dim,Mode,Options>::prescale(const MatrixBase<OtherDerived> &oth
 {
  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
  EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
-  m_matrix.template block<Dim,HDim>(0,0).noalias() = (other.asDiagonal() * m_matrix.template block<Dim,HDim>(0,0));
+  affine().noalias() = (other.asDiagonal() * affine());
  return *this;
 }

@@ -1029,7 +1048,7 @@ void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixTy
  }
 }

-/** decomposes the linear part of the transformation as a product rotation x scaling, the scaling being
+/** decomposes the linear part of the transformation as a product scaling x rotation, the scaling being
  * not necessarily positive.
  *
  * If either pointer is zero, the corresponding computation is skipped.
--- a/Eigen/src/Geometry/Translation.h
+++ b/Eigen/src/Geometry/Translation.h
@@ -130,8 +130,10 @@ public:
  }

  /** Applies translation to vector */
-  inline VectorType operator* (const VectorType& other) const
-  { return m_coeffs + other; }
+  template<typename Derived>
+  inline typename internal::enable_if<Derived::IsVectorAtCompileTime,VectorType>::type
+  operator* (const MatrixBase<Derived>& vec) const
+  { return m_coeffs + vec.derived(); }

  /** \returns the inverse translation (opposite) */
  Translation inverse() const { return Translation(-m_coeffs); }
@@ -162,7 +164,7 @@ public:
    * determined by \a prec.
    *
    * \sa MatrixBase::isApprox() */
-  bool isApprox(const Translation& other, typename NumTraits<Scalar>::Real prec = NumTraits<Scalar>::dummy_precision()) const
+  bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
  { return m_coeffs.isApprox(other.m_coeffs, prec); }

 };
--- a/Eigen/src/Householder/Householder.h
+++ b/Eigen/src/Householder/Householder.h
@@ -75,8 +75,9 @@ void MatrixBase<Derived>::makeHouseholder(
  
  RealScalar tailSqNorm = size()==1 ? RealScalar(0) : tail.squaredNorm();
  Scalar c0 = coeff(0);
+  const RealScalar tol = (std::numeric_limits<RealScalar>::min)();

-  if(tailSqNorm == RealScalar(0) && numext::imag(c0)==RealScalar(0))
+  if(tailSqNorm <= tol && numext::abs2(numext::imag(c0))<=tol)
  {
    tau = RealScalar(0);
    beta = numext::real(c0);
--- a/Eigen/src/Householder/HouseholderSequence.h
+++ b/Eigen/src/Householder/HouseholderSequence.h
@@ -237,8 +237,9 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
    {
      workspace.resize(rows());
      Index vecs = m_length;
+      const typename Dest::Scalar *dst_data = internal::extract_data(dst);
      if(    internal::is_same<typename internal::remove_all<VectorsType>::type,Dest>::value
-          && internal::extract_data(dst) == internal::extract_data(m_vectors))
+          && dst_data!=0 && dst_data == internal::extract_data(m_vectors))
      {
        // in-place
        dst.diagonal().setOnes();
--- a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
+++ b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
@@ -65,10 +65,10 @@ class DiagonalPreconditioner
      {
        typename MatType::InnerIterator it(mat,j);
        while(it && it.index()!=j) ++it;
-        if(it && it.index()==j)
+        if(it && it.index()==j && it.value()!=Scalar(0))
          m_invdiag(j) = Scalar(1)/it.value();
        else
-          m_invdiag(j) = 0;
+          m_invdiag(j) = Scalar(1);
      }
      m_isInitialized = true;
      return *this;
--- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
+++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
@@ -151,20 +151,7 @@ struct traits<BiCGSTAB<_MatrixType,_Preconditioner> >
  * \endcode
  * 
  * By default the iterations start with x=0 as an initial guess of the solution.
-  * One can control the start using the solveWithGuess() method. Here is a step by
-  * step execution example starting with a random guess and printing the evolution
-  * of the estimated error:
-  * * \code
-  * x = VectorXd::Random(n);
-  * solver.setMaxIterations(1);
-  * int i = 0;
-  * do {
-  *   x = solver.solveWithGuess(b,x);
-  *   std::cout << i << " : " << solver.error() << std::endl;
-  *   ++i;
-  * } while (solver.info()!=Success && i<100);
-  * \endcode
-  * Note that such a step by step excution is slightly slower.
+  * One can control the start using the solveWithGuess() method.
  * 
  * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
  */
@@ -199,7 +186,8 @@ public:
    * this class becomes invalid. Call compute() to update it with the new
    * matrix A, or modify a copy of A.
    */
-  BiCGSTAB(const MatrixType& A) : Base(A) {}
+  template<typename MatrixDerived>
+  explicit BiCGSTAB(const EigenBase<MatrixDerived>& A) : Base(A.derived()) {}

  ~BiCGSTAB() {}
  
--- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
+++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
@@ -112,9 +112,9 @@ struct traits<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
  * This class allows to solve for A.x = b sparse linear problems using a conjugate gradient algorithm.
  * The sparse matrix A must be selfadjoint. The vectors x and b can be either dense or sparse.
  *
-  * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix.
-  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
-  *               or Upper. Default is Lower.
+  * \tparam _MatrixType the type of the matrix A, can be a dense or a sparse matrix.
+  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower,
+  *               Upper, or Lower|Upper in which the full matrix entries will be considered. Default is Lower.
  * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
  *
  * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
@@ -137,21 +137,10 @@ struct traits<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
  * \endcode
  * 
  * By default the iterations start with x=0 as an initial guess of the solution.
-  * One can control the start using the solveWithGuess() method. Here is a step by
-  * step execution example starting with a random guess and printing the evolution
-  * of the estimated error:
-  * * \code
-  * x = VectorXd::Random(n);
-  * cg.setMaxIterations(1);
-  * int i = 0;
-  * do {
-  *   x = cg.solveWithGuess(b,x);
-  *   std::cout << i << " : " << cg.error() << std::endl;
-  *   ++i;
-  * } while (cg.info()!=Success && i<100);
-  * \endcode
-  * Note that such a step by step excution is slightly slower.
+  * One can control the start using the solveWithGuess() method.
  * 
+  * ConjugateGradient can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink.
+  *
  * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
  */
 template< typename _MatrixType, int _UpLo, typename _Preconditioner>
@@ -189,7 +178,8 @@ public:
    * this class becomes invalid. Call compute() to update it with the new
    * matrix A, or modify a copy of A.
    */
-  ConjugateGradient(const MatrixType& A) : Base(A) {}
+  template<typename MatrixDerived>
+  explicit ConjugateGradient(const EigenBase<MatrixDerived>& A) : Base(A.derived()) {}

  ~ConjugateGradient() {}
  
@@ -213,6 +203,10 @@ public:
  template<typename Rhs,typename Dest>
  void _solveWithGuess(const Rhs& b, Dest& x) const
  {
+    typedef typename internal::conditional<UpLo==(Lower|Upper),
+                                           const MatrixType&,
+                                           SparseSelfAdjointView<const MatrixType, UpLo>
+                                          >::type MatrixWrapperType;
    m_iterations = Base::maxIterations();
    m_error = Base::m_tolerance;

@@ -222,8 +216,7 @@ public:
      m_error = Base::m_tolerance;

      typename Dest::ColXpr xj(x,j);
-      internal::conjugate_gradient(mp_matrix->template selfadjointView<UpLo>(), b.col(j), xj,
-                                   Base::m_preconditioner, m_iterations, m_error);
+      internal::conjugate_gradient(MatrixWrapperType(*mp_matrix), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error);
    }

    m_isInitialized = true;
@@ -234,7 +227,7 @@ public:
  template<typename Rhs,typename Dest>
  void _solve(const Rhs& b, Dest& x) const
  {
-    x.setOnes();
+    x.setZero();
    _solveWithGuess(b,x);
  }

--- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
+++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
@@ -150,7 +150,6 @@ class IncompleteLUT : internal::noncopyable
    {
      analyzePattern(amat); 
      factorize(amat);
-      m_isInitialized = m_factorizationIsOk;
      return *this;
    }

@@ -160,7 +159,7 @@ class IncompleteLUT : internal::noncopyable
    template<typename Rhs, typename Dest>
    void _solve(const Rhs& b, Dest& x) const
    {
-      x = m_Pinv * b;  
+      x = m_Pinv * b;
      x = m_lu.template triangularView<UnitLower>().solve(x);
      x = m_lu.template triangularView<Upper>().solve(x);
      x = m_P * x; 
@@ -223,18 +222,29 @@ template<typename _MatrixType>
 void IncompleteLUT<Scalar>::analyzePattern(const _MatrixType& amat)
 {
  // Compute the Fill-reducing permutation
+  // Since ILUT does not perform any numerical pivoting,
+  // it is highly preferable to keep the diagonal through symmetric permutations.
+#ifndef EIGEN_MPL2_ONLY
+  // To this end, let's symmetrize the pattern and perform AMD on it.
  SparseMatrix<Scalar,ColMajor, Index> mat1 = amat;
  SparseMatrix<Scalar,ColMajor, Index> mat2 = amat.transpose();
-  // Symmetrize the pattern
  // FIXME for a matrix with nearly symmetric pattern, mat2+mat1 is the appropriate choice.
  //       on the other hand for a really non-symmetric pattern, mat2*mat1 should be prefered...
  SparseMatrix<Scalar,ColMajor, Index> AtA = mat2 + mat1;
-  AtA.prune(keep_diag());
-  internal::minimum_degree_ordering<Scalar, Index>(AtA, m_P);  // Then compute the AMD ordering...
-
-  m_Pinv  = m_P.inverse(); // ... and the inverse permutation
+  AMDOrdering<Index> ordering;
+  ordering(AtA,m_P);
+  m_Pinv  = m_P.inverse(); // cache the inverse permutation
+#else
+  // If AMD is not available, (MPL2-only), then let's use the slower COLAMD routine.
+  SparseMatrix<Scalar,ColMajor, Index> mat1 = amat;
+  COLAMDOrdering<Index> ordering;
+  ordering(mat1,m_Pinv);
+  m_P = m_Pinv.inverse();
+#endif

  m_analysisIsOk = true;
+  m_factorizationIsOk = false;
+  m_isInitialized = false;
 }

 template <typename Scalar>
@@ -442,6 +452,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
  m_lu.makeCompressed();

  m_factorizationIsOk = true;
+  m_isInitialized = m_factorizationIsOk;
  m_info = Success;
 }

--- a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
+++ b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
@@ -49,10 +49,11 @@ public:
    * this class becomes invalid. Call compute() to update it with the new
    * matrix A, or modify a copy of A.
    */
-  IterativeSolverBase(const MatrixType& A)
+  template<typename InputDerived>
+  IterativeSolverBase(const EigenBase<InputDerived>& A)
  {
    init();
-    compute(A);
+    compute(A.derived());
  }

  ~IterativeSolverBase() {}
@@ -62,9 +63,11 @@ public:
    * Currently, this function mostly call analyzePattern on the preconditioner. In the future
    * we might, for instance, implement column reodering for faster matrix vector products.
    */
-  Derived& analyzePattern(const MatrixType& A)
+  template<typename InputDerived>
+  Derived& analyzePattern(const EigenBase<InputDerived>& A)
  {
-    m_preconditioner.analyzePattern(A);
+    grabInput(A.derived());
+    m_preconditioner.analyzePattern(*mp_matrix);
    m_isInitialized = true;
    m_analysisIsOk = true;
    m_info = Success;
@@ -80,11 +83,12 @@ public:
    * this class becomes invalid. Call compute() to update it with the new
    * matrix A, or modify a copy of A.
    */
-  Derived& factorize(const MatrixType& A)
+  template<typename InputDerived>
+  Derived& factorize(const EigenBase<InputDerived>& A)
  {
+    grabInput(A.derived());
    eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); 
-    mp_matrix = &A;
-    m_preconditioner.factorize(A);
+    m_preconditioner.factorize(*mp_matrix);
    m_factorizationIsOk = true;
    m_info = Success;
    return derived();
@@ -100,10 +104,11 @@ public:
    * this class becomes invalid. Call compute() to update it with the new
    * matrix A, or modify a copy of A.
    */
-  Derived& compute(const MatrixType& A)
+  template<typename InputDerived>
+  Derived& compute(const EigenBase<InputDerived>& A)
  {
-    mp_matrix = &A;
-    m_preconditioner.compute(A);
+    grabInput(A.derived());
+    m_preconditioner.compute(*mp_matrix);
    m_isInitialized = true;
    m_analysisIsOk = true;
    m_factorizationIsOk = true;
@@ -212,6 +217,28 @@ public:
  }

 protected:
+
+  template<typename InputDerived>
+  void grabInput(const EigenBase<InputDerived>& A)
+  {
+    // we const cast to prevent the creation of a MatrixType temporary by the compiler.
+    grabInput_impl(A.const_cast_derived());
+  }
+
+  template<typename InputDerived>
+  void grabInput_impl(const EigenBase<InputDerived>& A)
+  {
+    m_copyMatrix = A;
+    mp_matrix = &m_copyMatrix;
+  }
+
+  void grabInput_impl(MatrixType& A)
+  {
+    if(MatrixType::RowsAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==Dynamic)
+      m_copyMatrix.resize(0,0);
+    mp_matrix = &A;
+  }
+
  void init()
  {
    m_isInitialized = false;
@@ -220,6 +247,7 @@ protected:
    m_maxIterations = -1;
    m_tolerance = NumTraits<Scalar>::epsilon();
  }
+  MatrixType m_copyMatrix;
  const MatrixType* mp_matrix;
  Preconditioner m_preconditioner;

--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -374,6 +374,12 @@ template<typename _MatrixType> class FullPivLU
    inline Index cols() const { return m_lu.cols(); }

  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+    
    MatrixType m_lu;
    PermutationPType m_p;
    PermutationQType m_q;
@@ -418,6 +424,8 @@ FullPivLU<MatrixType>::FullPivLU(const MatrixType& matrix)
 template<typename MatrixType>
 FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
 {
+  check_template_parameters();
+  
  // the permutations are stored as int indices, so just to be sure:
  eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest());
  
@@ -680,7 +688,7 @@ struct solve_retval<FullPivLU<_MatrixType>, Rhs>
     */

    const Index rows = dec().rows(), cols = dec().cols(),
-              nonzero_pivots = dec().nonzeroPivots();
+              nonzero_pivots = dec().rank();
    eigen_assert(rhs().rows() == rows);
    const Index smalldim = (std::min)(rows, cols);

--- a/Eigen/src/LU/Inverse.h
+++ b/Eigen/src/LU/Inverse.h
@@ -290,7 +290,7 @@ struct inverse_impl : public ReturnByValue<inverse_impl<MatrixType> >
  {
    const int Size = EIGEN_PLAIN_ENUM_MIN(MatrixType::ColsAtCompileTime,Dest::ColsAtCompileTime);
    EIGEN_ONLY_USED_FOR_DEBUG(Size);
-    eigen_assert(( (Size<=1) || (Size>4) || (extract_data(m_matrix)!=extract_data(dst)))
+    eigen_assert(( (Size<=1) || (Size>4) || (extract_data(m_matrix)!=0 && extract_data(m_matrix)!=extract_data(dst)))
              && "Aliasing problem detected in inverse(), you need to do inverse().eval() here.");

    compute_inverse<MatrixTypeNestedCleaned, Dest>::run(m_matrix, dst);
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -171,6 +171,12 @@ template<typename _MatrixType> class PartialPivLU
    inline Index cols() const { return m_lu.cols(); }

  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+    
    MatrixType m_lu;
    PermutationType m_p;
    TranspositionType m_rowsTranspositions;
@@ -386,6 +392,8 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t
 template<typename MatrixType>
 PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const MatrixType& matrix)
 {
+  check_template_parameters();
+  
  // the row permutation is stored as int indices, so just to be sure:
  eigen_assert(matrix.rows()<NumTraits<int>::highest());
  
--- a/Eigen/src/LU/arch/Inverse_SSE.h
+++ b/Eigen/src/LU/arch/Inverse_SSE.h
@@ -151,10 +151,12 @@ struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
    iC = _mm_mul_ps(rd,iC);
    iD = _mm_mul_ps(rd,iD);

-    result.template writePacket<ResultAlignment>( 0, _mm_shuffle_ps(iA,iB,0x77));
-    result.template writePacket<ResultAlignment>( 4, _mm_shuffle_ps(iA,iB,0x22));
-    result.template writePacket<ResultAlignment>( 8, _mm_shuffle_ps(iC,iD,0x77));
-    result.template writePacket<ResultAlignment>(12, _mm_shuffle_ps(iC,iD,0x22));
+    DenseIndex res_stride = result.outerStride();
+    float* res = result.data();
+    pstoret<float, Packet4f, ResultAlignment>(res+0,            _mm_shuffle_ps(iA,iB,0x77));
+    pstoret<float, Packet4f, ResultAlignment>(res+res_stride,   _mm_shuffle_ps(iA,iB,0x22));
+    pstoret<float, Packet4f, ResultAlignment>(res+2*res_stride, _mm_shuffle_ps(iC,iD,0x77));
+    pstoret<float, Packet4f, ResultAlignment>(res+3*res_stride, _mm_shuffle_ps(iC,iD,0x22));
  }

 };
@@ -311,14 +313,16 @@ struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
    iC1 = _mm_sub_pd(_mm_mul_pd(B1, dC), iC1);
    iC2 = _mm_sub_pd(_mm_mul_pd(B2, dC), iC2);

-    result.template writePacket<ResultAlignment>( 0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1));     // iA# / det
-    result.template writePacket<ResultAlignment>( 4, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2));
-    result.template writePacket<ResultAlignment>( 2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1));     // iB# / det
-    result.template writePacket<ResultAlignment>( 6, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2));
-    result.template writePacket<ResultAlignment>( 8, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1));     // iC# / det
-    result.template writePacket<ResultAlignment>(12, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2));
-    result.template writePacket<ResultAlignment>(10, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1));     // iD# / det
-    result.template writePacket<ResultAlignment>(14, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2));
+    DenseIndex res_stride = result.outerStride();
+    double* res = result.data();
+    pstoret<double, Packet2d, ResultAlignment>(res+0,             _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+res_stride,    _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2,             _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+res_stride+2,  _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride,  _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride,  _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2));
  }
 };

--- a/Eigen/src/OrderingMethods/Amd.h
+++ b/Eigen/src/OrderingMethods/Amd.h
@@ -8,7 +8,7 @@
 NOTE: this routine has been adapted from the CSparse library:

 Copyright (c) 2006, Timothy A. Davis.
-http://www.cise.ufl.edu/research/sparse/CSparse
+http://www.suitesparse.com

 CSparse is free software; you can redistribute it and/or
 modify it under the terms of the GNU Lesser General Public
@@ -137,22 +137,27 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,Index>& C, Permutation
    degree[i] = len[i];                 // degree of node i
  }
  mark = internal::cs_wclear<Index>(0, 0, w, n);         /* clear w */
-  elen[n] = -2;                         /* n is a dead element */
-  Cp[n] = -1;                           /* n is a root of assembly tree */
-  w[n] = 0;                             /* n is a dead element */
  
  /* --- Initialize degree lists ------------------------------------------ */
  for(i = 0; i < n; i++)
  {
+    bool has_diag = false;
+    for(p = Cp[i]; p<Cp[i+1]; ++p)
+      if(Ci[p]==i)
+      {
+        has_diag = true;
+        break;
+      }
+   
    d = degree[i];
-    if(d == 0)                         /* node i is empty */
+    if(d == 1 && has_diag)           /* node i is empty */
    {
      elen[i] = -2;                 /* element i is dead */
      nel++;
      Cp[i] = -1;                   /* i is a root of assembly tree */
      w[i] = 0;
    }
-    else if(d > dense)                 /* node i is dense */
+    else if(d > dense || !has_diag)  /* node i is dense or has no structural diagonal element */
    {
      nv[i] = 0;                    /* absorb i into element n */
      elen[i] = -1;                 /* node i is dead */
@@ -168,6 +173,10 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,Index>& C, Permutation
    }
  }
  
+  elen[n] = -2;                         /* n is a dead element */
+  Cp[n] = -1;                           /* n is a root of assembly tree */
+  w[n] = 0;                             /* n is a dead element */
+  
  while (nel < n)                         /* while (selecting pivots) do */
  {
    /* --- Select node of minimum approximate degree -------------------- */
--- a/Eigen/src/OrderingMethods/Eigen_Colamd.h
+++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h
@@ -41,12 +41,8 @@
 // 
 //   The colamd/symamd library is available at
 // 
-//       http://www.cise.ufl.edu/research/sparse/colamd/
+//       http://www.suitesparse.com

-//   This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h
-//   file.  It is required by the colamd.c, colamdmex.c, and symamdmex.c
-//   files, and by any C code that calls the routines whose prototypes are
-//   listed below, or that uses the colamd/symamd definitions listed below.
  
 #ifndef EIGEN_COLAMD_H
 #define EIGEN_COLAMD_H
@@ -102,9 +98,6 @@ namespace internal {
 /* === Definitions ========================================================== */
 /* ========================================================================== */

-#define COLAMD_MAX(a,b) (((a) > (b)) ? (a) : (b))
-#define COLAMD_MIN(a,b) (((a) < (b)) ? (a) : (b))
-
 #define ONES_COMPLEMENT(r) (-(r)-1)

 /* -------------------------------------------------------------------------- */
@@ -516,7 +509,7 @@ static Index init_rows_cols  /* returns true if OK, or false otherwise */
    Col [col].start = p [col] ;
    Col [col].length = p [col+1] - p [col] ;

-    if (Col [col].length < 0)
+    if ((Col [col].length) < 0) // extra parentheses to work-around gcc bug 10200
    {
      /* column pointers must be non-decreasing */
      stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ;
@@ -739,8 +732,8 @@ static void init_scoring

  /* === Extract knobs ==================================================== */

-  dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ;
-  dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ;
+  dense_row_count = std::max<Index>(0, (std::min)(Index(knobs [COLAMD_DENSE_ROW] * n_col), n_col)) ;
+  dense_col_count = std::max<Index>(0, (std::min)(Index(knobs [COLAMD_DENSE_COL] * n_row), n_row)) ;
  COLAMD_DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ;
  max_deg = 0 ;
  n_col2 = n_col ;
@@ -804,7 +797,7 @@ static void init_scoring
    else
    {
      /* keep track of max degree of remaining rows */
-      max_deg = COLAMD_MAX (max_deg, deg) ;
+      max_deg = (std::max)(max_deg, deg) ;
    }
  }
  COLAMD_DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ;
@@ -842,7 +835,7 @@ static void init_scoring
      /* add row's external degree */
      score += Row [row].shared1.degree - 1 ;
      /* guard against integer overflow */
-      score = COLAMD_MIN (score, n_col) ;
+      score = (std::min)(score, n_col) ;
    }
    /* determine pruned column length */
    col_length = (Index) (new_cp - &A [Col [c].start]) ;
@@ -914,7 +907,7 @@ static void init_scoring
      head [score] = c ;

      /* see if this score is less than current min */
-      min_score = COLAMD_MIN (min_score, score) ;
+      min_score = (std::min)(min_score, score) ;


    }
@@ -1040,7 +1033,7 @@ static Index find_ordering /* return the number of garbage collections */

    /* === Garbage_collection, if necessary ============================= */

-    needed_memory = COLAMD_MIN (pivot_col_score, n_col - k) ;
+    needed_memory = (std::min)(pivot_col_score, n_col - k) ;
    if (pfree + needed_memory >= Alen)
    {
      pfree = Eigen::internal::garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
@@ -1099,7 +1092,7 @@ static Index find_ordering /* return the number of garbage collections */

    /* clear tag on pivot column */
    Col [pivot_col].shared1.thickness = pivot_col_thickness ;
-    max_deg = COLAMD_MAX (max_deg, pivot_row_degree) ;
+    max_deg = (std::max)(max_deg, pivot_row_degree) ;


    /* === Kill all rows used to construct pivot row ==================== */
@@ -1273,7 +1266,7 @@ static Index find_ordering /* return the number of garbage collections */
 	/* add set difference */
 	cur_score += row_mark - tag_mark ;
 	/* integer overflow... */
-	cur_score = COLAMD_MIN (cur_score, n_col) ;
+	cur_score = (std::min)(cur_score, n_col) ;
      }

      /* recompute the column's length */
@@ -1386,7 +1379,7 @@ static Index find_ordering /* return the number of garbage collections */
      cur_score -= Col [col].shared1.thickness ;

      /* make sure score is less or equal than the max score */
-      cur_score = COLAMD_MIN (cur_score, max_score) ;
+      cur_score = (std::min)(cur_score, max_score) ;
      COLAMD_ASSERT (cur_score >= 0) ;

      /* store updated score */
@@ -1409,7 +1402,7 @@ static Index find_ordering /* return the number of garbage collections */
      head [cur_score] = col ;

      /* see if this score is less than current min */
-      min_score = COLAMD_MIN (min_score, cur_score) ;
+      min_score = (std::min)(min_score, cur_score) ;

    }

--- a/Eigen/src/PaStiXSupport/PaStiXSupport.h
+++ b/Eigen/src/PaStiXSupport/PaStiXSupport.h
@@ -10,6 +10,14 @@
 #ifndef EIGEN_PASTIXSUPPORT_H
 #define EIGEN_PASTIXSUPPORT_H

+#if defined(DCOMPLEX)
+  #define PASTIX_COMPLEX  COMPLEX
+  #define PASTIX_DCOMPLEX DCOMPLEX
+#else
+  #define PASTIX_COMPLEX  std::complex<float>
+  #define PASTIX_DCOMPLEX std::complex<double>
+#endif
+
 namespace Eigen { 

 /** \ingroup PaStiXSupport_Module
@@ -74,14 +82,14 @@ namespace internal
  {
    if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
    if (nbrhs == 0) {x = NULL; nbrhs=1;}
-    c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<COMPLEX*>(vals), perm, invp, reinterpret_cast<COMPLEX*>(x), nbrhs, iparm, dparm); 
+    c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_COMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_COMPLEX*>(x), nbrhs, iparm, dparm);
  }
  
  void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)
  {
    if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
    if (nbrhs == 0) {x = NULL; nbrhs=1;}
-    z_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<DCOMPLEX*>(vals), perm, invp, reinterpret_cast<DCOMPLEX*>(x), nbrhs, iparm, dparm); 
+    z_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_DCOMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_DCOMPLEX*>(x), nbrhs, iparm, dparm);
  }

  // Convert the matrix  to Fortran-style Numbering
--- a/Eigen/src/PardisoSupport/PardisoSupport.h
+++ b/Eigen/src/PardisoSupport/PardisoSupport.h
@@ -221,11 +221,11 @@ class PardisoImpl
      m_type = type;
      bool symmetric = std::abs(m_type) < 10;
      m_iparm[0] = 1;   // No solver default
-      m_iparm[1] = 3;   // use Metis for the ordering
-      m_iparm[2] = 1;   // Numbers of processors, value of OMP_NUM_THREADS
+      m_iparm[1] = 2;   // use Metis for the ordering
+      m_iparm[2] = 0;   // Reserved. Set to zero. (??Numbers of processors, value of OMP_NUM_THREADS??)
      m_iparm[3] = 0;   // No iterative-direct algorithm
      m_iparm[4] = 0;   // No user fill-in reducing permutation
-      m_iparm[5] = 0;   // Write solution into x
+      m_iparm[5] = 0;   // Write solution into x, b is left unchanged
      m_iparm[6] = 0;   // Not in use
      m_iparm[7] = 2;   // Max numbers of iterative refinement steps
      m_iparm[8] = 0;   // Not in use
@@ -246,7 +246,10 @@ class PardisoImpl
      m_iparm[26] = 0;  // No matrix checker
      m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0;
      m_iparm[34] = 1;  // C indexing
-      m_iparm[59] = 1;  // Automatic switch between In-Core and Out-of-Core modes
+      m_iparm[36] = 0;  // CSR
+      m_iparm[59] = 0;  // 0 - In-Core ; 1 - Automatic switch between In-Core and Out-of-Core modes ; 2 - Out-of-Core
+      
+      memset(m_pt, 0, sizeof(m_pt));
    }

  protected:
@@ -384,7 +387,6 @@ bool PardisoImpl<Base>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDerive
                                                     m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),
                                                     m_perm.data(), nrhs, m_iparm.data(), m_msglvl,
                                                     rhs_ptr, x.derived().data());
-
  return error==0;
 }

@@ -397,6 +399,9 @@ bool PardisoImpl<Base>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDerive
  * using the Intel MKL PARDISO library. The sparse matrix A must be squared and invertible.
  * The vectors or matrices X and B can be either dense or sparse.
  *
+  * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set:
+  * \code solver.pardisoParameterArray()[59] = 1; \endcode
+  *
  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
  *
  * \sa \ref TutorialSparseDirectSolvers
@@ -447,6 +452,9 @@ class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> >
  * using the Intel MKL PARDISO library. The sparse matrix A must be selfajoint and positive definite.
  * The vectors or matrices X and B can be either dense or sparse.
  *
+  * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set:
+  * \code solver.pardisoParameterArray()[59] = 1; \endcode
+  *
  * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
  * \tparam UpLo can be any bitwise combination of Upper, Lower. The default is Upper, meaning only the upper triangular part has to be used.
  *         Upper|Lower can be used to tell both triangular parts can be used as input.
@@ -507,6 +515,9 @@ class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> >
  * For complex matrices, A can also be symmetric only, see the \a Options template parameter.
  * The vectors or matrices X and B can be either dense or sparse.
  *
+  * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set:
+  * \code solver.pardisoParameterArray()[59] = 1; \endcode
+  *
  * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
  * \tparam Options can be any bitwise combination of Upper, Lower, and Symmetric. The default is Upper, meaning only the upper triangular part has to be used.
  *         Symmetric can be used for symmetric, non-selfadjoint complex matrices, the default being to assume a selfadjoint matrix.
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@@ -127,9 +127,6 @@ template<typename _MatrixType> class ColPivHouseholderQR
      *
      * \returns a solution.
      *
-      * \note The case where b is a matrix is not yet implemented. Also, this
-      *       code is space inefficient.
-      *
      * \note_about_checking_solutions
      *
      * \note_about_arbitrary_choice_of_solution
@@ -384,6 +381,12 @@ template<typename _MatrixType> class ColPivHouseholderQR
    }

  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+    
    MatrixType m_qr;
    HCoeffsType m_hCoeffs;
    PermutationType m_colsPermutation;
@@ -422,6 +425,8 @@ typename MatrixType::RealScalar ColPivHouseholderQR<MatrixType>::logAbsDetermina
 template<typename MatrixType>
 ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const MatrixType& matrix)
 {
+  check_template_parameters();
+  
  using std::abs;
  Index rows = matrix.rows();
  Index cols = matrix.cols();
@@ -463,20 +468,10 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
    // we store that back into our table: it can't hurt to correct our table.
    m_colSqNorms.coeffRef(biggest_col_index) = biggest_col_sq_norm;

-    // if the current biggest column is smaller than epsilon times the initial biggest column,
-    // terminate to avoid generating nan/inf values.
-    // Note that here, if we test instead for "biggest == 0", we get a failure every 1000 (or so)
-    // repetitions of the unit test, with the result of solve() filled with large values of the order
-    // of 1/(size*epsilon).
-    if(biggest_col_sq_norm < threshold_helper * RealScalar(rows-k))
-    {
+    // Track the number of meaningful pivots but do not stop the decomposition to make
+    // sure that the initial matrix is properly reproduced. See bug 941.
+    if(m_nonzero_pivots==size && biggest_col_sq_norm < threshold_helper * RealScalar(rows-k))
      m_nonzero_pivots = k;
-      m_hCoeffs.tail(size-k).setZero();
-      m_qr.bottomRightCorner(rows-k,cols-k)
-          .template triangularView<StrictlyLower>()
-          .setZero();
-      break;
-    }

    // apply the transposition to the columns
    m_colsTranspositions.coeffRef(k) = biggest_col_index;
@@ -505,7 +500,7 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
  }

  m_colsPermutation.setIdentity(PermIndexType(cols));
-  for(PermIndexType k = 0; k < m_nonzero_pivots; ++k)
+  for(PermIndexType k = 0; k < size/*m_nonzero_pivots*/; ++k)
    m_colsPermutation.applyTranspositionOnTheRight(k, PermIndexType(m_colsTranspositions.coeff(k)));

  m_det_pq = (number_of_transpositions%2) ? -1 : 1;
@@ -555,13 +550,15 @@ struct solve_retval<ColPivHouseholderQR<_MatrixType>, Rhs>

 } // end namespace internal

-/** \returns the matrix Q as a sequence of householder transformations */
+/** \returns the matrix Q as a sequence of householder transformations.
+  * You can extract the meaningful part only by using:
+  * \code qr.householderQ().setLength(qr.nonzeroPivots()) \endcode*/
 template<typename MatrixType>
 typename ColPivHouseholderQR<MatrixType>::HouseholderSequenceType ColPivHouseholderQR<MatrixType>
  ::householderQ() const
 {
  eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
-  return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate()).setLength(m_nonzero_pivots);
+  return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate());
 }

 /** \return the column-pivoting Householder QR decomposition of \c *this.
--- a/Eigen/src/QR/ColPivHouseholderQR_MKL.h
+++ b/Eigen/src/QR/ColPivHouseholderQR_MKL.h
@@ -49,7 +49,6 @@ ColPivHouseholderQR<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynami
 { \
  using std::abs; \
  typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> MatrixType; \
-  typedef MatrixType::Scalar Scalar; \
  typedef MatrixType::RealScalar RealScalar; \
  Index rows = matrix.rows();\
  Index cols = matrix.cols();\
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@@ -134,9 +134,6 @@ template<typename _MatrixType> class FullPivHouseholderQR
      * \returns the exact or least-square solution if the rank is greater or equal to the number of columns of A,
      * and an arbitrary solution otherwise.
      *
-      * \note The case where b is a matrix is not yet implemented. Also, this
-      *       code is space inefficient.
-      *
      * \note_about_checking_solutions
      *
      * \note_about_arbitrary_choice_of_solution
@@ -368,6 +365,12 @@ template<typename _MatrixType> class FullPivHouseholderQR
    RealScalar maxPivot() const { return m_maxpivot; }

  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+    
    MatrixType m_qr;
    HCoeffsType m_hCoeffs;
    IntDiagSizeVectorType m_rows_transpositions;
@@ -407,6 +410,8 @@ typename MatrixType::RealScalar FullPivHouseholderQR<MatrixType>::logAbsDetermin
 template<typename MatrixType>
 FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(const MatrixType& matrix)
 {
+  check_template_parameters();
+  
  using std::abs;
  Index rows = matrix.rows();
  Index cols = matrix.cols();
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -107,9 +107,6 @@ template<typename _MatrixType> class HouseholderQR
      *
      * \returns a solution.
      *
-      * \note The case where b is a matrix is not yet implemented. Also, this
-      *       code is space inefficient.
-      *
      * \note_about_checking_solutions
      *
      * \note_about_arbitrary_choice_of_solution
@@ -189,6 +186,12 @@ template<typename _MatrixType> class HouseholderQR
    const HCoeffsType& hCoeffs() const { return m_hCoeffs; }

  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+    
    MatrixType m_qr;
    HCoeffsType m_hCoeffs;
    RowVectorType m_temp;
@@ -251,56 +254,62 @@ void householder_qr_inplace_unblocked(MatrixQR& mat, HCoeffs& hCoeffs, typename
 }

 /** \internal */
-template<typename MatrixQR, typename HCoeffs>
-void householder_qr_inplace_blocked(MatrixQR& mat, HCoeffs& hCoeffs,
-                                       typename MatrixQR::Index maxBlockSize=32,
-                                       typename MatrixQR::Scalar* tempData = 0)
+template<typename MatrixQR, typename HCoeffs,
+  typename MatrixQRScalar = typename MatrixQR::Scalar,
+  bool InnerStrideIsOne = (MatrixQR::InnerStrideAtCompileTime == 1 && HCoeffs::InnerStrideAtCompileTime == 1)>
+struct householder_qr_inplace_blocked
 {
-  typedef typename MatrixQR::Index Index;
-  typedef typename MatrixQR::Scalar Scalar;
-  typedef Block<MatrixQR,Dynamic,Dynamic> BlockType;
-
-  Index rows = mat.rows();
-  Index cols = mat.cols();
-  Index size = (std::min)(rows, cols);
-
-  typedef Matrix<Scalar,Dynamic,1,ColMajor,MatrixQR::MaxColsAtCompileTime,1> TempType;
-  TempType tempVector;
-  if(tempData==0)
+  // This is specialized for MKL-supported Scalar types in HouseholderQR_MKL.h
+  static void run(MatrixQR& mat, HCoeffs& hCoeffs,
+      typename MatrixQR::Index maxBlockSize=32,
+      typename MatrixQR::Scalar* tempData = 0)
  {
-    tempVector.resize(cols);
-    tempData = tempVector.data();
-  }
+    typedef typename MatrixQR::Index Index;
+    typedef typename MatrixQR::Scalar Scalar;
+    typedef Block<MatrixQR,Dynamic,Dynamic> BlockType;

-  Index blockSize = (std::min)(maxBlockSize,size);
+    Index rows = mat.rows();
+    Index cols = mat.cols();
+    Index size = (std::min)(rows, cols);

-  Index k = 0;
-  for (k = 0; k < size; k += blockSize)
-  {
-    Index bs = (std::min)(size-k,blockSize);  // actual size of the block
-    Index tcols = cols - k - bs;            // trailing columns
-    Index brows = rows-k;                   // rows of the block
-
-    // partition the matrix:
-    //        A00 | A01 | A02
-    // mat  = A10 | A11 | A12
-    //        A20 | A21 | A22
-    // and performs the qr dec of [A11^T A12^T]^T
-    // and update [A21^T A22^T]^T using level 3 operations.
-    // Finally, the algorithm continue on A22
-
-    BlockType A11_21 = mat.block(k,k,brows,bs);
-    Block<HCoeffs,Dynamic,1> hCoeffsSegment = hCoeffs.segment(k,bs);
-
-    householder_qr_inplace_unblocked(A11_21, hCoeffsSegment, tempData);
-
-    if(tcols)
+    typedef Matrix<Scalar,Dynamic,1,ColMajor,MatrixQR::MaxColsAtCompileTime,1> TempType;
+    TempType tempVector;
+    if(tempData==0)
    {
-      BlockType A21_22 = mat.block(k,k+bs,brows,tcols);
-      apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment.adjoint());
+      tempVector.resize(cols);
+      tempData = tempVector.data();
+    }
+
+    Index blockSize = (std::min)(maxBlockSize,size);
+
+    Index k = 0;
+    for (k = 0; k < size; k += blockSize)
+    {
+      Index bs = (std::min)(size-k,blockSize);  // actual size of the block
+      Index tcols = cols - k - bs;            // trailing columns
+      Index brows = rows-k;                   // rows of the block
+
+      // partition the matrix:
+      //        A00 | A01 | A02
+      // mat  = A10 | A11 | A12
+      //        A20 | A21 | A22
+      // and performs the qr dec of [A11^T A12^T]^T
+      // and update [A21^T A22^T]^T using level 3 operations.
+      // Finally, the algorithm continue on A22
+
+      BlockType A11_21 = mat.block(k,k,brows,bs);
+      Block<HCoeffs,Dynamic,1> hCoeffsSegment = hCoeffs.segment(k,bs);
+
+      householder_qr_inplace_unblocked(A11_21, hCoeffsSegment, tempData);
+
+      if(tcols)
+      {
+        BlockType A21_22 = mat.block(k,k+bs,brows,tcols);
+        apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment.adjoint());
+      }
    }
  }
-}
+};

 template<typename _MatrixType, typename Rhs>
 struct solve_retval<HouseholderQR<_MatrixType>, Rhs>
@@ -343,6 +352,8 @@ struct solve_retval<HouseholderQR<_MatrixType>, Rhs>
 template<typename MatrixType>
 HouseholderQR<MatrixType>& HouseholderQR<MatrixType>::compute(const MatrixType& matrix)
 {
+  check_template_parameters();
+  
  Index rows = matrix.rows();
  Index cols = matrix.cols();
  Index size = (std::min)(rows,cols);
@@ -352,7 +363,7 @@ HouseholderQR<MatrixType>& HouseholderQR<MatrixType>::compute(const MatrixType&

  m_temp.resize(cols);

-  internal::householder_qr_inplace_blocked(m_qr, m_hCoeffs, 48, m_temp.data());
+  internal::householder_qr_inplace_blocked<MatrixType, HCoeffsType>::run(m_qr, m_hCoeffs, 48, m_temp.data());

  m_isInitialized = true;
  return *this;
--- a/Eigen/src/QR/HouseholderQR_MKL.h
+++ b/Eigen/src/QR/HouseholderQR_MKL.h
@@ -34,28 +34,30 @@
 #ifndef EIGEN_QR_MKL_H
 #define EIGEN_QR_MKL_H

-#include "Eigen/src/Core/util/MKL_support.h"
+#include "../Core/util/MKL_support.h"

 namespace Eigen { 

-namespace internal {
+  namespace internal {

-/** \internal Specialization for the data types supported by MKL */
+    /** \internal Specialization for the data types supported by MKL */

 #define EIGEN_MKL_QR_NOPIV(EIGTYPE, MKLTYPE, MKLPREFIX) \
 template<typename MatrixQR, typename HCoeffs> \
-void householder_qr_inplace_blocked(MatrixQR& mat, HCoeffs& hCoeffs, \
-                                       typename MatrixQR::Index maxBlockSize=32, \
-                                       EIGTYPE* tempData = 0) \
+struct householder_qr_inplace_blocked<MatrixQR, HCoeffs, EIGTYPE, true> \
 { \
-  lapack_int m = mat.rows(); \
-  lapack_int n = mat.cols(); \
-  lapack_int lda = mat.outerStride(); \
-  lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
-  LAPACKE_##MKLPREFIX##geqrf( matrix_order, m, n, (MKLTYPE*)mat.data(), lda, (MKLTYPE*)hCoeffs.data()); \
-  hCoeffs.adjointInPlace(); \
-\
-}
+  static void run(MatrixQR& mat, HCoeffs& hCoeffs, \
+      typename MatrixQR::Index = 32, \
+      typename MatrixQR::Scalar* = 0) \
+  { \
+    lapack_int m = (lapack_int) mat.rows(); \
+    lapack_int n = (lapack_int) mat.cols(); \
+    lapack_int lda = (lapack_int) mat.outerStride(); \
+    lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
+    LAPACKE_##MKLPREFIX##geqrf( matrix_order, m, n, (MKLTYPE*)mat.data(), lda, (MKLTYPE*)hCoeffs.data()); \
+    hCoeffs.adjointInPlace(); \
+  } \
+};

 EIGEN_MKL_QR_NOPIV(double, double, d)
 EIGEN_MKL_QR_NOPIV(float, float, s)
--- a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
+++ b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
@@ -47,7 +47,7 @@ namespace Eigen {
 * You can then apply it to a vector.
 * 
 * R is the sparse triangular factor. Use matrixQR() to get it as SparseMatrix.
- * NOTE : The Index type of R is always UF_long. You can get it with SPQR::Index
+ * NOTE : The Index type of R is always SuiteSparse_long. You can get it with SPQR::Index
 * 
 * \tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<>
 * NOTE 
@@ -59,24 +59,18 @@ class SPQR
  public:
    typedef typename _MatrixType::Scalar Scalar;
    typedef typename _MatrixType::RealScalar RealScalar;
-    typedef UF_long Index ; 
+    typedef SuiteSparse_long Index ;
    typedef SparseMatrix<Scalar, ColMajor, Index> MatrixType;
    typedef PermutationMatrix<Dynamic, Dynamic> PermutationType;
  public:
    SPQR() 
-      : m_isInitialized(false),
-      m_ordering(SPQR_ORDERING_DEFAULT),
-      m_allow_tol(SPQR_DEFAULT_TOL),
-      m_tolerance (NumTraits<Scalar>::epsilon())
+      : m_isInitialized(false), m_ordering(SPQR_ORDERING_DEFAULT), m_allow_tol(SPQR_DEFAULT_TOL), m_tolerance (NumTraits<Scalar>::epsilon()), m_useDefaultThreshold(true)
    { 
      cholmod_l_start(&m_cc);
    }
    
-    SPQR(const _MatrixType& matrix) 
-    : m_isInitialized(false),
-      m_ordering(SPQR_ORDERING_DEFAULT),
-      m_allow_tol(SPQR_DEFAULT_TOL),
-      m_tolerance (NumTraits<Scalar>::epsilon())
+    SPQR(const _MatrixType& matrix)
+    : m_isInitialized(false), m_ordering(SPQR_ORDERING_DEFAULT), m_allow_tol(SPQR_DEFAULT_TOL), m_tolerance (NumTraits<Scalar>::epsilon()), m_useDefaultThreshold(true)
    {
      cholmod_l_start(&m_cc);
      compute(matrix);
@@ -101,15 +95,31 @@ class SPQR
      if(m_isInitialized) SPQR_free();

      MatrixType mat(matrix);
+      
+      /* Compute the default threshold as in MatLab, see:
+       * Tim Davis, "Algorithm 915, SuiteSparseQR: Multifrontal Multithreaded Rank-Revealing
+       * Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011, Page 8:3 
+       */
+      RealScalar pivotThreshold = m_tolerance;
+      if(m_useDefaultThreshold) 
+      {
+        using std::max;
+        RealScalar max2Norm = 0.0;
+        for (int j = 0; j < mat.cols(); j++) max2Norm = (max)(max2Norm, mat.col(j).norm());
+        if(max2Norm==RealScalar(0))
+          max2Norm = RealScalar(1);
+        pivotThreshold = 20 * (mat.rows() + mat.cols()) * max2Norm * NumTraits<RealScalar>::epsilon();
+      }
      cholmod_sparse A; 
      A = viewAsCholmod(mat);
+      m_rows = matrix.rows();
      Index col = matrix.cols();
-      m_rank = SuiteSparseQR<Scalar>(m_ordering, m_tolerance, col, &A, 
+      m_rank = SuiteSparseQR<Scalar>(m_ordering, pivotThreshold, col, &A, 
                             &m_cR, &m_E, &m_H, &m_HPinv, &m_HTau, &m_cc);

      if (!m_cR)
      {
-        m_info = NumericalIssue; 
+        m_info = NumericalIssue;
        m_isInitialized = false;
        return;
      }
@@ -120,7 +130,7 @@ class SPQR
    /** 
     * Get the number of rows of the input matrix and the Q matrix
     */
-    inline Index rows() const {return m_H->nrow; }
+    inline Index rows() const {return m_rows; }
    
    /** 
     * Get the number of columns of the input matrix. 
@@ -145,16 +155,25 @@ class SPQR
    {
      eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()");
      eigen_assert(b.cols()==1 && "This method is for vectors only");
-      
+
      //Compute Q^T * b
-      typename Dest::PlainObject y;
+      typename Dest::PlainObject y, y2;
      y = matrixQ().transpose() * b;
-        // Solves with the triangular matrix R
+      
+      // Solves with the triangular matrix R
      Index rk = this->rank();
-      y.topRows(rk) = this->matrixR().topLeftCorner(rk, rk).template triangularView<Upper>().solve(y.topRows(rk));
-      y.bottomRows(cols()-rk).setZero();
+      y2 = y;
+      y.resize((std::max)(cols(),Index(y.rows())),y.cols());
+      y.topRows(rk) = this->matrixR().topLeftCorner(rk, rk).template triangularView<Upper>().solve(y2.topRows(rk));
+
      // Apply the column permutation 
-      dest.topRows(cols()) = colsPermutation() * y.topRows(cols());
+      // colsPermutation() performs a copy of the permutation,
+      // so let's apply it manually:
+      for(Index i = 0; i < rk; ++i) dest.row(m_E[i]) = y.row(i);
+      for(Index i = rk; i < cols(); ++i) dest.row(m_E[i]).setZero();
+      
+//       y.bottomRows(y.rows()-rk).setZero();
+//       dest = colsPermutation() * y.topRows(cols());
      
      m_info = Success;
    }
@@ -197,7 +216,11 @@ class SPQR
    /// Set the fill-reducing ordering method to be used
    void setSPQROrdering(int ord) { m_ordering = ord;}
    /// Set the tolerance tol to treat columns with 2-norm < =tol as zero
-    void setPivotThreshold(const RealScalar& tol) { m_tolerance = tol; }
+    void setPivotThreshold(const RealScalar& tol)
+    {
+      m_useDefaultThreshold = false;
+      m_tolerance = tol;
+    }
    
    /** \returns a pointer to the SPQR workspace */
    cholmod_common *cholmodCommon() const { return &m_cc; }
@@ -230,6 +253,8 @@ class SPQR
    mutable cholmod_dense *m_HTau; // The Householder coefficients
    mutable Index m_rank; // The rank of the matrix
    mutable cholmod_common m_cc; // Workspace and parameters
+    bool m_useDefaultThreshold;     // Use default threshold
+    Index m_rows;
    template<typename ,typename > friend struct SPQR_QProduct;
 };

--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -359,29 +359,42 @@ struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, false
 {
  typedef JacobiSVD<MatrixType, QRPreconditioner> SVD;
  typedef typename SVD::Index Index;
-  static void run(typename SVD::WorkMatrixType&, SVD&, Index, Index) {}
+  typedef typename MatrixType::RealScalar RealScalar;
+  static bool run(typename SVD::WorkMatrixType&, SVD&, Index, Index, RealScalar&) { return true; }
 };

 template<typename MatrixType, int QRPreconditioner>
 struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>
 {
  typedef JacobiSVD<MatrixType, QRPreconditioner> SVD;
+  typedef typename SVD::Index Index;
  typedef typename MatrixType::Scalar Scalar;
  typedef typename MatrixType::RealScalar RealScalar;
-  typedef typename SVD::Index Index;
-  static void run(typename SVD::WorkMatrixType& work_matrix, SVD& svd, Index p, Index q)
+  static bool run(typename SVD::WorkMatrixType& work_matrix, SVD& svd, Index p, Index q, RealScalar& maxDiagEntry)
  {
    using std::sqrt;
+    using std::abs;
+    using std::max;
    Scalar z;
    JacobiRotation<Scalar> rot;
    RealScalar n = sqrt(numext::abs2(work_matrix.coeff(p,p)) + numext::abs2(work_matrix.coeff(q,p)));
-    
+
+    const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
+    const RealScalar precision = NumTraits<Scalar>::epsilon();
+
    if(n==0)
    {
-      z = abs(work_matrix.coeff(p,q)) / work_matrix.coeff(p,q);
-      work_matrix.row(p) *= z;
-      if(svd.computeU()) svd.m_matrixU.col(p) *= conj(z);
-      if(work_matrix.coeff(q,q)!=Scalar(0))
+      // make sure first column is zero
+      work_matrix.coeffRef(p,p) = work_matrix.coeffRef(q,p) = Scalar(0);
+
+      if(abs(numext::imag(work_matrix.coeff(p,q)))>considerAsZero)
+      {
+        // work_matrix.coeff(p,q) can be zero if work_matrix.coeff(q,p) is not zero but small enough to underflow when computing n
+        z = abs(work_matrix.coeff(p,q)) / work_matrix.coeff(p,q);
+        work_matrix.row(p) *= z;
+        if(svd.computeU()) svd.m_matrixU.col(p) *= conj(z);
+      }
+      if(abs(numext::imag(work_matrix.coeff(q,q)))>considerAsZero)
      {
        z = abs(work_matrix.coeff(q,q)) / work_matrix.coeff(q,q);
        work_matrix.row(q) *= z;
@@ -395,19 +408,25 @@ struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>
      rot.s() = work_matrix.coeff(q,p) / n;
      work_matrix.applyOnTheLeft(p,q,rot);
      if(svd.computeU()) svd.m_matrixU.applyOnTheRight(p,q,rot.adjoint());
-      if(work_matrix.coeff(p,q) != Scalar(0))
+      if(abs(numext::imag(work_matrix.coeff(p,q)))>considerAsZero)
      {
-        Scalar z = abs(work_matrix.coeff(p,q)) / work_matrix.coeff(p,q);
+        z = abs(work_matrix.coeff(p,q)) / work_matrix.coeff(p,q);
        work_matrix.col(q) *= z;
        if(svd.computeV()) svd.m_matrixV.col(q) *= z;
      }
-      if(work_matrix.coeff(q,q) != Scalar(0))
+      if(abs(numext::imag(work_matrix.coeff(q,q)))>considerAsZero)
      {
        z = abs(work_matrix.coeff(q,q)) / work_matrix.coeff(q,q);
        work_matrix.row(q) *= z;
        if(svd.computeU()) svd.m_matrixU.col(q) *= conj(z);
      }
    }
+
+    // update largest diagonal entry
+    maxDiagEntry = max EIGEN_EMPTY (maxDiagEntry,max EIGEN_EMPTY (abs(work_matrix.coeff(p,p)), abs(work_matrix.coeff(q,q))));
+    // and check whether the 2x2 block is already diagonal
+    RealScalar threshold = max EIGEN_EMPTY (considerAsZero, precision * maxDiagEntry);
+    return abs(work_matrix.coeff(p,q))>threshold || abs(work_matrix.coeff(q,p)) > threshold;
  }
 };

@@ -424,22 +443,23 @@ void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
  JacobiRotation<RealScalar> rot1;
  RealScalar t = m.coeff(0,0) + m.coeff(1,1);
  RealScalar d = m.coeff(1,0) - m.coeff(0,1);
-  if(t == RealScalar(0))
+  if(d == RealScalar(0))
  {
-    rot1.c() = RealScalar(0);
-    rot1.s() = d > RealScalar(0) ? RealScalar(1) : RealScalar(-1);
+    rot1.s() = RealScalar(0);
+    rot1.c() = RealScalar(1);
  }
  else
  {
-    RealScalar t2d2 = numext::hypot(t,d);
-    rot1.c() = abs(t)/t2d2;
-    rot1.s() = d/t2d2;
-    if(t<RealScalar(0))
-      rot1.s() = -rot1.s();
+    // If d!=0, then t/d cannot overflow because the magnitude of the
+    // entries forming d are not too small compared to the ones forming t.
+    RealScalar u = t / d;
+    RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u));
+    rot1.s() = RealScalar(1) / tmp;
+    rot1.c() = u / tmp;
  }
  m.applyOnTheLeft(0,1,rot1);
  j_right->makeJacobi(m,0,1);
-  *j_left  = rot1 * j_right->transpose();
+  *j_left = rot1 * j_right->transpose();
 }

 } // end namespace internal
@@ -742,6 +762,11 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD

  private:
    void allocate(Index rows, Index cols, unsigned int computationOptions);
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }

  protected:
    MatrixUType m_matrixU;
@@ -811,14 +836,17 @@ void JacobiSVD<MatrixType, QRPreconditioner>::allocate(Index rows, Index cols, u
  
  if(m_cols>m_rows)   m_qr_precond_morecols.allocate(*this);
  if(m_rows>m_cols)   m_qr_precond_morerows.allocate(*this);
-  if(m_cols!=m_cols)  m_scaledMatrix.resize(rows,cols);
+  if(m_rows!=m_cols)  m_scaledMatrix.resize(rows,cols);
 }

 template<typename MatrixType, int QRPreconditioner>
 JacobiSVD<MatrixType, QRPreconditioner>&
 JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsigned int computationOptions)
 {
+  check_template_parameters();
+  
  using std::abs;
+  using std::max;
  allocate(matrix.rows(), matrix.cols(), computationOptions);

  // currently we stop when we reach precision 2*epsilon as the last bit of precision can require an unreasonable number of iterations,
@@ -850,6 +878,7 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
  }

  /*** step 2. The main Jacobi SVD iteration. ***/
+  RealScalar maxDiagEntry = m_workMatrix.cwiseAbs().diagonal().maxCoeff();

  bool finished = false;
  while(!finished)
@@ -865,25 +894,27 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
        // if this 2x2 sub-matrix is not diagonal already...
        // notice that this comparison will evaluate to false if any NaN is involved, ensuring that NaN's don't
        // keep us iterating forever. Similarly, small denormal numbers are considered zero.
-        using std::max;
-        RealScalar threshold = (max)(considerAsZero, precision * (max)(abs(m_workMatrix.coeff(p,p)),
-                                                                       abs(m_workMatrix.coeff(q,q))));
-        // We compare both values to threshold instead of calling max to be robust to NaN (See bug 791)
+        RealScalar threshold = max EIGEN_EMPTY (considerAsZero, precision * maxDiagEntry);
        if(abs(m_workMatrix.coeff(p,q))>threshold || abs(m_workMatrix.coeff(q,p)) > threshold)
        {
          finished = false;
-
          // perform SVD decomposition of 2x2 sub-matrix corresponding to indices p,q to make it diagonal
-          internal::svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner>::run(m_workMatrix, *this, p, q);
-          JacobiRotation<RealScalar> j_left, j_right;
-          internal::real_2x2_jacobi_svd(m_workMatrix, p, q, &j_left, &j_right);
+          // the complex to real operation returns true is the updated 2x2 block is not already diagonal
+          if(internal::svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner>::run(m_workMatrix, *this, p, q, maxDiagEntry))
+          {
+            JacobiRotation<RealScalar> j_left, j_right;
+            internal::real_2x2_jacobi_svd(m_workMatrix, p, q, &j_left, &j_right);

-          // accumulate resulting Jacobi rotations
-          m_workMatrix.applyOnTheLeft(p,q,j_left);
-          if(computeU()) m_matrixU.applyOnTheRight(p,q,j_left.transpose());
+            // accumulate resulting Jacobi rotations
+            m_workMatrix.applyOnTheLeft(p,q,j_left);
+            if(computeU()) m_matrixU.applyOnTheRight(p,q,j_left.transpose());

-          m_workMatrix.applyOnTheRight(p,q,j_right);
-          if(computeV()) m_matrixV.applyOnTheRight(p,q,j_right);
+            m_workMatrix.applyOnTheRight(p,q,j_right);
+            if(computeV()) m_matrixV.applyOnTheRight(p,q,j_right);
+
+            // keep track of the largest diagonal coefficient
+            maxDiagEntry = max EIGEN_EMPTY (maxDiagEntry,max EIGEN_EMPTY (abs(m_workMatrix.coeff(p,p)), abs(m_workMatrix.coeff(q,q))));
+          }
        }
      }
    }
--- a/Eigen/src/SVD/JacobiSVD_MKL.h
+++ b/Eigen/src/SVD/JacobiSVD_MKL.h
@@ -45,8 +45,8 @@ JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPiv
 JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPivHouseholderQRPreconditioner>::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>& matrix, unsigned int computationOptions) \
 { \
  typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> MatrixType; \
-  typedef MatrixType::Scalar Scalar; \
-  typedef MatrixType::RealScalar RealScalar; \
+  /*typedef MatrixType::Scalar Scalar;*/ \
+  /*typedef MatrixType::RealScalar RealScalar;*/ \
  allocate(matrix.rows(), matrix.cols(), computationOptions); \
 \
  /*const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();*/ \
--- a/Show More
+++ b/Show More