bump to 3.3-beta1

bug #1120 : Make sure that SuperLU version is checked
Update doc to make it clear that only SuperLU 4.x is supported
2026-04-10 11:34:33 +08:00 · 2015-12-16 21:48:48 +01:00 · 2015-12-16 11:37:16 +01:00 · 2015-12-16 10:47:03 +01:00 · 2015-12-16 10:14:24 +01:00 · 2015-12-15 11:34:52 +01:00
312 changed files with 10726 additions and 4258 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
 project(Eigen)

-cmake_minimum_required(VERSION 2.8.4)
+cmake_minimum_required(VERSION 2.8.5)

 # guard against in-source builds

@@ -55,6 +55,7 @@ endif(EIGEN_HG_CHANGESET)


 include(CheckCXXCompilerFlag)
+include(GNUInstallDirs)

 set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)

@@ -118,11 +119,7 @@ endmacro(ei_add_cxx_compiler_flag)

 if(NOT MSVC)
  # We assume that other compilers are partly compatible with GNUCC
-  
-#  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
-  set(CMAKE_CXX_FLAGS_DEBUG "-g3")
-  set(CMAKE_CXX_FLAGS_RELEASE "-g0 -O2")
-  
+
  # clang outputs some warnings for unknwon flags that are not caught by check_cxx_compiler_flag
  # adding -Werror turns such warnings into errors
  check_cxx_compiler_flag("-Werror" COMPILER_SUPPORT_WERROR)
@@ -341,24 +338,29 @@ option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tens

 include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})

-# the user modifiable install path for header files
-set(EIGEN_INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR} CACHE PATH "The directory where we install the header files (optional)")
-
-# set the internal install path for header files which depends on wether the user modifiable
-# EIGEN_INCLUDE_INSTALL_DIR has been set by the user or not.
+# Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR
 if(EIGEN_INCLUDE_INSTALL_DIR)
-  set(INCLUDE_INSTALL_DIR
-    ${EIGEN_INCLUDE_INSTALL_DIR}
-    CACHE INTERNAL
-    "The directory where we install the header files (internal)"
-  )
+  message(WARNING "EIGEN_INCLUDE_INSTALL_DIR is deprecated. Use INCLUDE_INSTALL_DIR instead.")
+endif()
+
+if(EIGEN_INCLUDE_INSTALL_DIR AND NOT INCLUDE_INSTALL_DIR)
+  set(INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR}
+      CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed")
 else()
  set(INCLUDE_INSTALL_DIR
-    "${CMAKE_INSTALL_PREFIX}/include/eigen3"
-    CACHE INTERNAL
-    "The directory where we install the header files (internal)"
-  )
+      "${CMAKE_INSTALL_INCLUDEDIR}/eigen3"
+      CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed"
+      )
 endif()
+set(CMAKEPACKAGE_INSTALL_DIR
+    "${CMAKE_INSTALL_LIBDIR}/cmake/eigen3"
+    CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed"
+    )
+set(PKGCONFIG_INSTALL_DIR
+    "${CMAKE_INSTALL_DATADIR}/pkgconfig"
+    CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where eigen3.pc is installed"
+    )
+

 # similar to set_target_properties but append the property instead of overwriting it
 macro(ei_add_target_property target prop value)
@@ -377,21 +379,9 @@ install(FILES
  )

 if(EIGEN_BUILD_PKGCONFIG)
-    SET(path_separator ":")
-    STRING(REPLACE ${path_separator} ";" pkg_config_libdir_search "$ENV{PKG_CONFIG_LIBDIR}")
-    message(STATUS "searching for 'pkgconfig' directory in PKG_CONFIG_LIBDIR ( $ENV{PKG_CONFIG_LIBDIR} ), ${CMAKE_INSTALL_PREFIX}/share, and ${CMAKE_INSTALL_PREFIX}/lib")
-    FIND_PATH(pkg_config_libdir pkgconfig ${pkg_config_libdir_search} ${CMAKE_INSTALL_PREFIX}/share ${CMAKE_INSTALL_PREFIX}/lib ${pkg_config_libdir_search})
-    if(pkg_config_libdir)
-        SET(pkg_config_install_dir ${pkg_config_libdir})
-        message(STATUS "found ${pkg_config_libdir}/pkgconfig" )
-    else(pkg_config_libdir)
-        SET(pkg_config_install_dir ${CMAKE_INSTALL_PREFIX}/share)
-        message(STATUS "pkgconfig not found; installing in ${pkg_config_install_dir}" )
-    endif(pkg_config_libdir)
-
-    configure_file(eigen3.pc.in eigen3.pc)
+    configure_file(eigen3.pc.in eigen3.pc @ONLY)
    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc
-        DESTINATION ${pkg_config_install_dir}/pkgconfig
+        DESTINATION ${PKGCONFIG_INSTALL_DIR}
        )
 endif(EIGEN_BUILD_PKGCONFIG)

@@ -454,12 +444,15 @@ if(cmake_generator_tolower MATCHES "makefile")
  message(STATUS "--------------+--------------------------------------------------------------")
  message(STATUS "Command       |   Description")
  message(STATUS "--------------+--------------------------------------------------------------")
-  message(STATUS "make install  | Install to ${CMAKE_INSTALL_PREFIX}. To change that:")
-  message(STATUS "              |     cmake . -DCMAKE_INSTALL_PREFIX=yourpath")
-  message(STATUS "              |   Eigen headers will then be installed to:")
-  message(STATUS "              |     ${INCLUDE_INSTALL_DIR}")
-  message(STATUS "              |   To install Eigen headers to a separate location, do:")
-  message(STATUS "              |     cmake . -DEIGEN_INCLUDE_INSTALL_DIR=yourpath")
+  message(STATUS "make install  | Install Eigen. Headers will be installed to:")
+  message(STATUS "              |     <CMAKE_INSTALL_PREFIX>/<INCLUDE_INSTALL_DIR>")
+  message(STATUS "              |   Using the following values:")
+  message(STATUS "              |     CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
+  message(STATUS "              |     INCLUDE_INSTALL_DIR:  ${INCLUDE_INSTALL_DIR}")
+  message(STATUS "              |   Change the install location of Eigen headers using:")
+  message(STATUS "              |     cmake . -DCMAKE_INSTALL_PREFIX=yourprefix")
+  message(STATUS "              |   Or:")
+  message(STATUS "              |     cmake . -DINCLUDE_INSTALL_DIR=yourdir")
  message(STATUS "make doc      | Generate the API documentation, requires Doxygen & LaTeX")
  message(STATUS "make check    | Build and run the unit-tests. Read this page:")
  message(STATUS "              |   http://eigen.tuxfamily.org/index.php?title=Tests")
@@ -473,21 +466,13 @@ endif()

 message(STATUS "")

-set ( EIGEN_CONFIG_CMAKE_PATH
-      lib${LIB_SUFFIX}/cmake/eigen3
-      CACHE PATH "The directory where the CMake files are installed"
-    )
-if ( NOT IS_ABSOLUTE EIGEN_CONFIG_CMAKE_PATH )
-  set ( EIGEN_CONFIG_CMAKE_PATH ${CMAKE_INSTALL_PREFIX}/${EIGEN_CONFIG_CMAKE_PATH} )
-endif ()

-set ( EIGEN_USE_FILE ${EIGEN_CONFIG_CMAKE_PATH}/UseEigen3.cmake )
 set ( EIGEN_VERSION_STRING ${EIGEN_VERSION_NUMBER} )
 set ( EIGEN_VERSION_MAJOR  ${EIGEN_WORLD_VERSION} )
 set ( EIGEN_VERSION_MINOR  ${EIGEN_MAJOR_VERSION} )
 set ( EIGEN_VERSION_PATCH  ${EIGEN_MINOR_VERSION} )
 set ( EIGEN_DEFINITIONS "")
-set ( EIGEN_INCLUDE_DIR ${INCLUDE_INSTALL_DIR} )
+set ( EIGEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}" )
 set ( EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR} )
 set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} )

@@ -498,7 +483,7 @@ configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in

 install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
                ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
-          DESTINATION ${EIGEN_CONFIG_CMAKE_PATH}
+          DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}
        )

 # Add uninstall target
--- a/Eigen/Cholesky
+++ b/Eigen/Cholesky
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_CHOLESKY_MODULE_H
 #define EIGEN_CHOLESKY_MODULE_H

--- a/Eigen/CholmodSupport
+++ b/Eigen/CholmodSupport
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_CHOLMODSUPPORT_MODULE_H
 #define EIGEN_CHOLMODSUPPORT_MODULE_H

--- a/Eigen/Core
+++ b/Eigen/Core
@@ -300,6 +300,7 @@ using std::ptrdiff_t;

 #include "src/Core/NumTraits.h"
 #include "src/Core/MathFunctions.h"
+#include "src/Core/SpecialFunctions.h"
 #include "src/Core/GenericPacketMath.h"

 #if defined EIGEN_VECTORIZE_AVX
@@ -382,8 +383,6 @@ using std::ptrdiff_t;
 #include "src/Core/DiagonalMatrix.h"
 #include "src/Core/Diagonal.h"
 #include "src/Core/DiagonalProduct.h"
-#include "src/Core/PermutationMatrix.h"
-#include "src/Core/Transpositions.h"
 #include "src/Core/Redux.h"
 #include "src/Core/Visitor.h"
 #include "src/Core/Fuzzy.h"
@@ -393,6 +392,9 @@ using std::ptrdiff_t;
 #include "src/Core/GeneralProduct.h"
 #include "src/Core/Solve.h"
 #include "src/Core/Inverse.h"
+#include "src/Core/SolverBase.h"
+#include "src/Core/PermutationMatrix.h"
+#include "src/Core/Transpositions.h"
 #include "src/Core/TriangularMatrix.h"
 #include "src/Core/SelfAdjointView.h"
 #include "src/Core/products/GeneralBlockPanelKernel.h"
--- a/Eigen/Eigenvalues
+++ b/Eigen/Eigenvalues
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_EIGENVALUES_MODULE_H
 #define EIGEN_EIGENVALUES_MODULE_H

--- a/Eigen/Geometry
+++ b/Eigen/Geometry
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_GEOMETRY_MODULE_H
 #define EIGEN_GEOMETRY_MODULE_H

--- a/Eigen/Householder
+++ b/Eigen/Householder
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_HOUSEHOLDER_MODULE_H
 #define EIGEN_HOUSEHOLDER_MODULE_H

--- a/Eigen/IterativeLinearSolvers
+++ b/Eigen/IterativeLinearSolvers
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
 #define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H

@@ -34,6 +41,7 @@
 #include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h"
 #include "src/IterativeLinearSolvers/BiCGSTAB.h"
 #include "src/IterativeLinearSolvers/IncompleteLUT.h"
+#include "src/IterativeLinearSolvers/IncompleteCholesky.h"

 #include "src/Core/util/ReenableStupidWarnings.h"

--- a/Eigen/Jacobi
+++ b/Eigen/Jacobi
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_JACOBI_MODULE_H
 #define EIGEN_JACOBI_MODULE_H

--- a/Eigen/LU
+++ b/Eigen/LU
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_LU_MODULE_H
 #define EIGEN_LU_MODULE_H

--- a/Eigen/MetisSupport
+++ b/Eigen/MetisSupport
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_METISSUPPORT_MODULE_H
 #define EIGEN_METISSUPPORT_MODULE_H

--- a/Eigen/OrderingMethods
+++ b/Eigen/OrderingMethods
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_ORDERINGMETHODS_MODULE_H
 #define EIGEN_ORDERINGMETHODS_MODULE_H

--- a/Eigen/PaStiXSupport
+++ b/Eigen/PaStiXSupport
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_PASTIXSUPPORT_MODULE_H
 #define EIGEN_PASTIXSUPPORT_MODULE_H

--- a/Eigen/PardisoSupport
+++ b/Eigen/PardisoSupport
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_PARDISOSUPPORT_MODULE_H
 #define EIGEN_PARDISOSUPPORT_MODULE_H

@@ -7,8 +14,6 @@

 #include <mkl_pardiso.h>

-#include <unsupported/Eigen/SparseExtra>
-
 /** \ingroup Support_modules
  * \defgroup PardisoSupport_Module PardisoSupport module
  *
--- a/Eigen/QR
+++ b/Eigen/QR
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_QR_MODULE_H
 #define EIGEN_QR_MODULE_H

--- a/Eigen/QtAlignedMalloc
+++ b/Eigen/QtAlignedMalloc
@@ -1,3 +1,9 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

 #ifndef EIGEN_QTMALLOC_MODULE_H
 #define EIGEN_QTMALLOC_MODULE_H
--- a/Eigen/SPQRSupport
+++ b/Eigen/SPQRSupport
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_SPQRSUPPORT_MODULE_H
 #define EIGEN_SPQRSUPPORT_MODULE_H

--- a/Eigen/SVD
+++ b/Eigen/SVD
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_SVD_MODULE_H
 #define EIGEN_SVD_MODULE_H

--- a/Eigen/Sparse
+++ b/Eigen/Sparse
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_SPARSE_MODULE_H
 #define EIGEN_SPARSE_MODULE_H

--- a/Eigen/SparseCore
+++ b/Eigen/SparseCore
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_SPARSECORE_MODULE_H
 #define EIGEN_SPARSECORE_MODULE_H

@@ -14,7 +21,7 @@
 /** 
  * \defgroup SparseCore_Module SparseCore module
  *
-  * This module provides a sparse matrix representation, and basic associatd matrix manipulations
+  * This module provides a sparse matrix representation, and basic associated matrix manipulations
  * and operations.
  *
  * See the \ref TutorialSparse "Sparse tutorial"
--- a/Eigen/SparseQR
+++ b/Eigen/SparseQR
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_SPARSEQR_MODULE_H
 #define EIGEN_SPARSEQR_MODULE_H

--- a/Eigen/SuperLUSupport
+++ b/Eigen/SuperLUSupport
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_SUPERLUSUPPORT_MODULE_H
 #define EIGEN_SUPERLUSUPPORT_MODULE_H

@@ -36,6 +43,8 @@ namespace Eigen { struct SluMatrix; }
  * - class SuperLU: a supernodal sequential LU factorization.
  * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods).
  *
+  * \warning This wrapper is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported.
+  *
  * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting.
  *
  * \code
--- a/Eigen/UmfPackSupport
+++ b/Eigen/UmfPackSupport
@@ -1,3 +1,10 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
 #ifndef EIGEN_UMFPACKSUPPORT_MODULE_H
 #define EIGEN_UMFPACKSUPPORT_MODULE_H

--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -99,14 +99,15 @@ template<typename _MatrixType, int _UpLo> class LDLT
      * This calculates the decomposition for the input \a matrix.
      * \sa LDLT(Index size)
      */
-    explicit LDLT(const MatrixType& matrix)
+    template<typename InputType>
+    explicit LDLT(const EigenBase<InputType>& matrix)
      : m_matrix(matrix.rows(), matrix.cols()),
        m_transpositions(matrix.rows()),
        m_temporary(matrix.rows()),
        m_sign(internal::ZeroSign),
        m_isInitialized(false)
    {
-      compute(matrix);
+      compute(matrix.derived());
    }

    /** Clear any existing decomposition
@@ -188,7 +189,8 @@ template<typename _MatrixType, int _UpLo> class LDLT
    template<typename Derived>
    bool solveInPlace(MatrixBase<Derived> &bAndX) const;

-    LDLT& compute(const MatrixType& matrix);
+    template<typename InputType>
+    LDLT& compute(const EigenBase<InputType>& matrix);

    template <typename Derived>
    LDLT& rankUpdate(const MatrixBase<Derived>& w, const RealScalar& alpha=1);
@@ -427,14 +429,15 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
 /** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix
  */
 template<typename MatrixType, int _UpLo>
-LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
+template<typename InputType>
+LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
 {
  check_template_parameters();
  
  eigen_assert(a.rows()==a.cols());
  const Index size = a.rows();

-  m_matrix = a;
+  m_matrix = a.derived();

  m_transpositions.resize(size);
  m_isInitialized = false;
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -87,11 +87,12 @@ template<typename _MatrixType, int _UpLo> class LLT
    explicit LLT(Index size) : m_matrix(size, size),
                    m_isInitialized(false) {}

-    explicit LLT(const MatrixType& matrix)
+    template<typename InputType>
+    explicit LLT(const EigenBase<InputType>& matrix)
      : m_matrix(matrix.rows(), matrix.cols()),
        m_isInitialized(false)
    {
-      compute(matrix);
+      compute(matrix.derived());
    }

    /** \returns a view of the upper triangular matrix U */
@@ -131,7 +132,8 @@ template<typename _MatrixType, int _UpLo> class LLT
    template<typename Derived>
    void solveInPlace(MatrixBase<Derived> &bAndX) const;

-    LLT& compute(const MatrixType& matrix);
+    template<typename InputType>
+    LLT& compute(const EigenBase<InputType>& matrix);

    /** \returns the LLT decomposition matrix
      *
@@ -283,7 +285,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
        return k;
      mat.coeffRef(k,k) = x = sqrt(x);
      if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint();
-      if (rs>0) A21 *= RealScalar(1)/x;
+      if (rs>0) A21 /= x;
    }
    return -1;
  }
@@ -381,14 +383,15 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
  * Output: \verbinclude TutorialLinAlgComputeTwice.out
  */
 template<typename MatrixType, int _UpLo>
-LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
+template<typename InputType>
+LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
 {
  check_template_parameters();
  
  eigen_assert(a.rows()==a.cols());
  const Index size = a.rows();
  m_matrix.resize(size, size);
-  m_matrix = a;
+  m_matrix = a.derived();

  m_isInitialized = true;
  bool ok = Traits::inplace_decomposition(m_matrix);
--- a/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -78,7 +78,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
  {
    res.itype = CHOLMOD_INT;
  }
-  else if (internal::is_same<_StorageIndex,UF_long>::value)
+  else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value)
  {
    res.itype = CHOLMOD_LONG;
  }
@@ -170,6 +170,10 @@ class CholmodBase : public SparseSolverBase<Derived>
    typedef typename MatrixType::RealScalar RealScalar;
    typedef MatrixType CholMatrixType;
    typedef typename MatrixType::StorageIndex StorageIndex;
+    enum {
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };

  public:

@@ -350,6 +354,8 @@ class CholmodBase : public SparseSolverBase<Derived>
  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
  *               or Upper. Default is Lower.
  *
+  * \implsparsesolverconcept
+  *
  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
  *
  * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT
@@ -397,6 +403,8 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
  *               or Upper. Default is Lower.
  *
+  * \implsparsesolverconcept
+  *
  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
  *
  * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT
@@ -442,6 +450,8 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
  *               or Upper. Default is Lower.
  *
+  * \implsparsesolverconcept
+  *
  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
  *
  * \sa \ref TutorialSparseDirectSolvers
@@ -489,6 +499,8 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
  *               or Upper. Default is Lower.
  *
+  * \implsparsesolverconcept
+  *
  * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
  *
  * \sa \ref TutorialSparseDirectSolvers
--- a/Eigen/src/Core/ArrayBase.h
+++ b/Eigen/src/Core/ArrayBase.h
@@ -46,15 +46,14 @@ template<typename Derived> class ArrayBase

    typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;

-    using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
-                typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
-
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;

    typedef DenseBase<Derived> Base;
+    using Base::operator*;
+    using Base::operator/;
    using Base::RowsAtCompileTime;
    using Base::ColsAtCompileTime;
    using Base::SizeAtCompileTime;
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -54,6 +54,7 @@ private:
    InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
              : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
              : int(Dst::MaxRowsAtCompileTime),
+    OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
    MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
    PacketSize = unpacket_traits<PacketType>::size
  };
@@ -65,7 +66,9 @@ private:
    MightVectorize = StorageOrdersAgree
                  && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
                  && (functor_traits<AssignFunc>::PacketAccess),
-    MayInnerVectorize  = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
+    MayInnerVectorize  = MightVectorize
+                       && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
+                       && int(OuterStride)!=Dynamic && int(OuterStride)%int(PacketSize)==0
                       && int(JointAlignment)>=int(RequiredAlignment),
    MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
    MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
@@ -95,10 +98,8 @@ private:
  enum {
    UnrollingLimit      = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
    MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
-                       && int(SrcEvaluator::CoeffReadCost) != Dynamic
                       && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit),
    MayUnrollInner      = int(InnerSize) != Dynamic
-                       && int(SrcEvaluator::CoeffReadCost) != Dynamic
                       && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit)
  };

@@ -125,8 +126,8 @@ public:
    std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
    std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
    std::cerr.setf(std::ios::hex, std::ios::basefield);
-    EIGEN_DEBUG_VAR(DstFlags)
-    EIGEN_DEBUG_VAR(SrcFlags)
+    std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
+    std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
    std::cerr.unsetf(std::ios::hex);
    EIGEN_DEBUG_VAR(DstAlignment)
    EIGEN_DEBUG_VAR(SrcAlignment)
@@ -141,11 +142,11 @@ public:
    EIGEN_DEBUG_VAR(MayInnerVectorize)
    EIGEN_DEBUG_VAR(MayLinearVectorize)
    EIGEN_DEBUG_VAR(MaySliceVectorize)
-    EIGEN_DEBUG_VAR(Traversal)
+    std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
    EIGEN_DEBUG_VAR(UnrollingLimit)
    EIGEN_DEBUG_VAR(MayUnrollCompletely)
    EIGEN_DEBUG_VAR(MayUnrollInner)
-    EIGEN_DEBUG_VAR(Unrolling)
+    std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
    std::cerr << std::endl;
  }
 #endif
@@ -288,7 +289,7 @@ struct dense_assignment_loop;
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
 {
-  EIGEN_DEVICE_FUNC static void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
  {
    for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
      for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
@@ -311,7 +312,6 @@ struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
 {
-  typedef typename Kernel::StorageIndex StorageIndex;
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
@@ -392,7 +392,6 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
 {
-  typedef typename Kernel::StorageIndex StorageIndex;
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
@@ -414,7 +413,7 @@ template<typename Kernel>
 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
 {
  typedef typename Kernel::PacketType PacketType;
-  EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    const Index innerSize = kernel.innerSize();
    const Index outerSize = kernel.outerSize();
@@ -438,7 +437,6 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
 {
-  typedef typename Kernel::StorageIndex StorageIndex;
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
@@ -455,7 +453,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
 template<typename Kernel>
 struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
 {
-  EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
  {
    const Index size = kernel.size();
    for(Index i = 0; i < size; ++i)
@@ -545,7 +543,6 @@ public:
  typedef DstEvaluatorTypeT DstEvaluatorType;
  typedef SrcEvaluatorTypeT SrcEvaluatorType;
  typedef typename DstEvaluatorType::Scalar Scalar;
-  typedef typename DstEvaluatorType::StorageIndex StorageIndex;
  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
  typedef typename AssignmentTraits::PacketType PacketType;
  
@@ -565,26 +562,23 @@ public:
  EIGEN_DEVICE_FUNC Index cols() const        { return m_dstExpr.cols(); }
  EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
  
-  // TODO get rid of this one:
-  EIGEN_DEVICE_FUNC DstXprType& dstExpression() const { return m_dstExpr; }
-  
  EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
  EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
  
  /// Assign src(row,col) to dst(row,col) through the assignment functor.
-  EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
  {
    m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
  }
  
  /// \sa assignCoeff(Index,Index)
-  EIGEN_DEVICE_FUNC void assignCoeff(Index index)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
  {
    m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
  }
  
  /// \sa assignCoeff(Index,Index)
-  EIGEN_DEVICE_FUNC void assignCoeffByOuterInner(Index outer, Index inner)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
  {
    Index row = rowIndexByOuterInner(outer, inner); 
    Index col = colIndexByOuterInner(outer, inner); 
@@ -593,26 +587,26 @@ public:
  
  
  template<int StoreMode, int LoadMode, typename PacketType>
-  EIGEN_DEVICE_FUNC void assignPacket(Index row, Index col)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
  {
    m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
  }
  
  template<int StoreMode, int LoadMode, typename PacketType>
-  EIGEN_DEVICE_FUNC void assignPacket(Index index)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
  {
    m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
  }
  
  template<int StoreMode, int LoadMode, typename PacketType>
-  EIGEN_DEVICE_FUNC void assignPacketByOuterInner(Index outer, Index inner)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
  {
    Index row = rowIndexByOuterInner(outer, inner); 
    Index col = colIndexByOuterInner(outer, inner);
    assignPacket<StoreMode,LoadMode,PacketType>(row, col);
  }
  
-  EIGEN_DEVICE_FUNC static Index rowIndexByOuterInner(Index outer, Index inner)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
  {
    typedef typename DstEvaluatorType::ExpressionTraits Traits;
    return int(Traits::RowsAtCompileTime) == 1 ? 0
@@ -621,7 +615,7 @@ public:
      : inner;
  }

-  EIGEN_DEVICE_FUNC static Index colIndexByOuterInner(Index outer, Index inner)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
  {
    typedef typename DstEvaluatorType::ExpressionTraits Traits;
    return int(Traits::ColsAtCompileTime) == 1 ? 0
@@ -719,14 +713,8 @@ EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& fun
 }

 // by-pass AssumeAliasing
-// FIXME the const version should probably not be needed
 // When there is no aliasing, we require that 'dst' has been properly resized
 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
-EIGEN_DEVICE_FUNC void call_assignment(const NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
-{
-  call_assignment_no_alias(dst.expression(), src, func);
-}
-template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
 EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
 {
  call_assignment_no_alias(dst.expression(), src, func);
@@ -737,11 +725,9 @@ template<typename Dst, typename Src, typename Func>
 EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
 {
  enum {
-    NeedToTranspose = (  (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
-                        |   // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
-                                // revert to || as soon as not needed anymore.
-                         (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1))
-                     && int(Dst::SizeAtCompileTime) != 1
+    NeedToTranspose = (    (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
+                        || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
+                      ) && int(Dst::SizeAtCompileTime) != 1
  };

  Index dstRows = NeedToTranspose ? src.cols() : src.rows();
@@ -756,11 +742,7 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const
  // TODO check whether this is the right place to perform these checks:
  EIGEN_STATIC_ASSERT_LVALUE(Dst)
  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
-
-  // TODO this line is commented to allow matrix = permutation
-  // Actually, the "Scalar" type for a permutation matrix does not really make sense,
-  // perhaps it could be void, and EIGEN_CHECK_BINARY_COMPATIBILIY could allow micing void with anything...?
-//   EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
+  EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
  
  Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
 }
--- a/Eigen/src/Core/BooleanRedux.h
+++ b/Eigen/src/Core/BooleanRedux.h
@@ -83,8 +83,6 @@ inline bool DenseBase<Derived>::all() const
  typedef internal::evaluator<Derived> Evaluator;
  enum {
    unroll = SizeAtCompileTime != Dynamic
-          && Evaluator::CoeffReadCost != Dynamic
-          && NumTraits<Scalar>::AddCost != Dynamic
          && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
  };
  Evaluator evaluator(derived());
@@ -109,8 +107,6 @@ inline bool DenseBase<Derived>::any() const
  typedef internal::evaluator<Derived> Evaluator;
  enum {
    unroll = SizeAtCompileTime != Dynamic
-          && Evaluator::CoeffReadCost != Dynamic
-          && NumTraits<Scalar>::AddCost != Dynamic
          && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
  };
  Evaluator evaluator(derived());
@@ -142,7 +138,11 @@ inline Eigen::Index DenseBase<Derived>::count() const
 template<typename Derived>
 inline bool DenseBase<Derived>::hasNaN() const
 {
+#if EIGEN_COMP_MSVC || (defined __FAST_MATH__)
+  return derived().array().isNaN().any();
+#else
  return !((derived().array()==derived().array()).all());
+#endif
 }

 /** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values.
@@ -152,7 +152,11 @@ inline bool DenseBase<Derived>::hasNaN() const
 template<typename Derived>
 inline bool DenseBase<Derived>::allFinite() const
 {
+#if EIGEN_COMP_MSVC || (defined __FAST_MATH__)
+  return derived().array().isFinite().all();
+#else
  return !((derived()-derived()).hasNaN());
+#endif
 }
    
 } // end namespace Eigen
--- a/Eigen/src/Core/CommaInitializer.h
+++ b/Eigen/src/Core/CommaInitializer.h
@@ -106,7 +106,7 @@ struct CommaInitializer
  EIGEN_DEVICE_FUNC
  inline ~CommaInitializer()
 #if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS
-  throw(Eigen::eigen_assert_exception)
+  EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception)
 #endif
  {
    eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows()
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -29,6 +29,7 @@ struct storage_kind_to_evaluator_kind {
 template<typename StorageKind> struct storage_kind_to_shape;

 template<> struct storage_kind_to_shape<Dense>                  { typedef DenseShape Shape;           };
+template<> struct storage_kind_to_shape<SolverStorage>          { typedef SolverShape Shape;           };
 template<> struct storage_kind_to_shape<PermutationStorage>     { typedef PermutationShape Shape;     };
 template<> struct storage_kind_to_shape<TranspositionsStorage>  { typedef TranspositionsShape Shape;  };

@@ -98,9 +99,6 @@ struct evaluator<const T>
 template<typename ExpressionType>
 struct evaluator_base : public noncopyable
 {
-  // FIXME is it really usefull?
-  typedef typename traits<ExpressionType>::StorageIndex StorageIndex;
-  
  // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.
  typedef traits<ExpressionType> ExpressionTraits;
  
@@ -140,11 +138,15 @@ struct evaluator<PlainObjectBase<Derived> >
      m_outerStride(IsVectorAtCompileTime  ? 0 
                                           : int(IsRowMajor) ? ColsAtCompileTime 
                                           : RowsAtCompileTime)
-  {}
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
  
  EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m)
    : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) 
-  { }
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }

  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
  {
@@ -324,13 +326,15 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
          &  (  HereditaryBits
              | (functor_has_linear_access<NullaryOp>::ret  ? LinearAccessBit : 0)
              | (functor_traits<NullaryOp>::PacketAccess    ? PacketAccessBit : 0)))
-          | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit), // FIXME EvalBeforeNestingBit should be needed anymore
-    Alignment = 0 // FIXME alignment should not matter here, perhaps we could set it to AlignMax??
+          | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
+    Alignment = AlignedMax
  };

  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)
    : m_functor(n.functor()) 
-  { }
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }

  typedef typename XprType::CoeffReturnType CoeffReturnType;

@@ -379,7 +383,10 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
    : m_functor(op.functor()), 
      m_argImpl(op.nestedExpression()) 
-  { }
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }

  typedef typename XprType::CoeffReturnType CoeffReturnType;

@@ -452,7 +459,10 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
    : m_functor(xpr.functor()),
      m_lhsImpl(xpr.lhs()), 
      m_rhsImpl(xpr.rhs())  
-  { }
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }

  typedef typename XprType::CoeffReturnType CoeffReturnType;

@@ -505,7 +515,10 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
    : m_unaryOp(op.functor()), 
      m_argImpl(op.nestedExpression()) 
-  { }
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }

  typedef typename XprType::Scalar Scalar;
  typedef typename XprType::CoeffReturnType CoeffReturnType;
@@ -562,6 +575,7 @@ struct mapbase_evaluator : evaluator_base<Derived>
  {
    EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
                        PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
  }
 
  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
@@ -636,17 +650,9 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
    HasNoStride = HasNoInnerStride && HasNoOuterStride,
    IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
    
-    PacketAlignment = unpacket_traits<PacketScalar>::alignment,
-    
-    KeepsPacketAccess = bool(HasNoInnerStride)
-                        && ( bool(IsDynamicSize)
-                           || HasNoOuterStride
-                           || ( OuterStrideAtCompileTime!=Dynamic
-                           && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % PacketAlignment)==0 ) ),
-    Flags0 = evaluator<PlainObjectType>::Flags,
-    Flags1 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
-           ? int(Flags0) : int(Flags0 & ~LinearAccessBit),
-    Flags = KeepsPacketAccess ? int(Flags1) : (int(Flags1) & ~PacketAccessBit),
+    PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit),
+    LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit),
+    Flags = int( evaluator<PlainObjectType>::Flags) & (LinearAccessMask&PacketAccessMask),
    
    Alignment = int(MapOptions)&int(AlignedMask)
  };
@@ -724,7 +730,10 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
    Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
  };
  typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {}
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block)
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
 };

 // no direct-access => dispatch to a unary evaluator
@@ -825,14 +834,14 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
  EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
    : mapbase_evaluator<XprType, typename XprType::PlainObject>(block) 
  {
-    // FIXME this should be an internal assertion
+    // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime
    eigen_assert(((size_t(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
  }
 };


 // -------------------- Select --------------------
-// TODO shall we introduce a ternary_evaluator?
+// NOTE shall we introduce a ternary_evaluator?

 // TODO enable vectorization for Select
 template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
@@ -842,8 +851,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
  typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
  enum {
    CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost
-                  + EIGEN_SIZE_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
-                                   evaluator<ElseMatrixType>::CoeffReadCost),
+                  + EIGEN_PLAIN_ENUM_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
+                                         evaluator<ElseMatrixType>::CoeffReadCost),

    Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits,
    
@@ -854,7 +863,9 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
    : m_conditionImpl(select.conditionMatrix()),
      m_thenImpl(select.thenMatrix()),
      m_elseImpl(select.elseMatrix())
-  { }
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
 
  typedef typename XprType::CoeffReturnType CoeffReturnType;

@@ -897,8 +908,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
  
  enum {
    CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
-    
-    Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit),
+    LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0,
+    Flags = (evaluator<ArgTypeNestedCleaned>::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit),
    
    Alignment = evaluator<ArgTypeNestedCleaned>::Alignment
  };
@@ -957,7 +968,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
  }
 
 protected:
-  const ArgTypeNested m_arg; // FIXME is it OK to store both the argument and its evaluator?? (we have the same situation in evaluator_product)
+  const ArgTypeNested m_arg;
  evaluator<ArgTypeNestedCleaned> m_argImpl;
  const variable_if_dynamic<Index, ArgType::RowsAtCompileTime> m_rows;
  const variable_if_dynamic<Index, ArgType::ColsAtCompileTime> m_cols;
@@ -965,48 +976,57 @@ protected:


 // -------------------- PartialReduxExpr --------------------
-//
-// This is a wrapper around the expression object. 
-// TODO: Find out how to write a proper evaluator without duplicating
-//       the row() and col() member functions.

 template< typename ArgType, typename MemberOp, int Direction>
 struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
  : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> >
 {
  typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
-  typedef typename XprType::Scalar InputScalar;
+  typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested;
+  typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
+  typedef typename ArgType::Scalar InputScalar;
+  typedef typename XprType::Scalar Scalar;
  enum {
-    TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) :  int(XprType::ColsAtCompileTime)
+    TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) :  int(ArgType::ColsAtCompileTime)
  };
  typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
  enum {
-    CoeffReadCost = TraversalSize==Dynamic ? Dynamic
+    CoeffReadCost = TraversalSize==Dynamic ? HugeCost
                  : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
    
-    Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits),
+    Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))),
    
-    Alignment = 0 // FIXME this could be improved
+    Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
  };

-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType expr)
-    : m_expr(expr)
-  {}
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr)
+    : m_arg(xpr.nestedExpression()), m_functor(xpr.functor())
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value));
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }

  typedef typename XprType::CoeffReturnType CoeffReturnType;
- 
-  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
-  { 
-    return m_expr.coeff(row, col);
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const
+  {
+    if (Direction==Vertical)
+      return m_functor(m_arg.col(j));
+    else
+      return m_functor(m_arg.row(i));
  }
-  
-  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
-  { 
-    return m_expr.coeff(index);
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
+  {
+    if (Direction==Vertical)
+      return m_functor(m_arg.col(index));
+    else
+      return m_functor(m_arg.row(index));
  }

 protected:
-  const XprType m_expr;
+  const ArgTypeNested m_arg;
+  const MemberOp m_functor;
 };


@@ -1130,6 +1150,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
    // FIXME enable DirectAccess with negative strides?
    Flags0 = evaluator<ArgType>::Flags,
    LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
+                  || ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1))
                 ? LinearAccessBit : 0,

    Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess),
@@ -1139,8 +1160,8 @@ struct unary_evaluator<Reverse<ArgType, Direction> >

  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse)
    : m_argImpl(reverse.nestedExpression()),
-      m_rows(ReverseRow ? reverse.nestedExpression().rows() : 0),
-      m_cols(ReverseCol ? reverse.nestedExpression().cols() : 0)
+      m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1),
+      m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1)
  { }
 
  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
@@ -1214,8 +1235,9 @@ protected:
  evaluator<ArgType> m_argImpl;

  // If we do not reverse rows, then we do not need to know the number of rows; same for columns
-  const variable_if_dynamic<Index, ReverseRow ? ArgType::RowsAtCompileTime : 0> m_rows;
-  const variable_if_dynamic<Index, ReverseCol ? ArgType::ColsAtCompileTime : 0> m_cols;
+  // Nonetheless, in this case it is important to set to 1 such that the coeff(index) method works fine for vectors.
+  const variable_if_dynamic<Index, ReverseRow ? ArgType::RowsAtCompileTime : 1> m_rows;
+  const variable_if_dynamic<Index, ReverseCol ? ArgType::ColsAtCompileTime : 1> m_cols;
 };


@@ -1331,20 +1353,16 @@ struct evaluator<EvalToTemp<ArgType> >
  typedef evaluator<PlainObject> Base;
  
  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
-    : m_result(xpr.rows(), xpr.cols())
+    : m_result(xpr.arg())
  {
    ::new (static_cast<Base*>(this)) Base(m_result);
-    // TODO we should simply do m_result(xpr.arg());
-    call_dense_assignment_loop(m_result, xpr.arg());
  }

  // This constructor is used when nesting an EvalTo evaluator in another evaluator
  EIGEN_DEVICE_FUNC evaluator(const ArgType& arg)
-    : m_result(arg.rows(), arg.cols())
+    : m_result(arg)
  {
    ::new (static_cast<Base*>(this)) Base(m_result);
-    // TODO we should simply do m_result(xpr.arg());
-    call_dense_assignment_loop(m_result, arg);
  }

 protected:
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -40,18 +40,14 @@ static inline void check_DenseIndex_is_signed() {
  */
 template<typename Derived> class DenseBase
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-  : public internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
-                                     typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>
+  : public internal::special_scalar_op_base<Derived, typename internal::traits<Derived>::Scalar,
+                                            typename NumTraits<typename internal::traits<Derived>::Scalar>::Real,
+                                            DenseCoeffsBase<Derived> >
 #else
  : public DenseCoeffsBase<Derived>
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 {
  public:
-    using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
-                typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
-    using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
-                typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator/;
-

    /** Inner iterator type to iterate over the coefficients of a row or column.
      * \sa class InnerIterator
@@ -77,9 +73,10 @@ template<typename Derived> class DenseBase
    typedef Scalar value_type;
    
    typedef typename NumTraits<Scalar>::Real RealScalar;
+    typedef internal::special_scalar_op_base<Derived,Scalar,RealScalar, DenseCoeffsBase<Derived> > Base;

-    typedef internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
-                      typename NumTraits<typename internal::traits<Derived>::Scalar>::Real> Base;
+    using Base::operator*;
+    using Base::operator/;
    using Base::derived;
    using Base::const_cast_derived;
    using Base::rows;
@@ -215,10 +212,6 @@ template<typename Derived> class DenseBase
      * of stored coefficients. */
    EIGEN_DEVICE_FUNC
    inline Index nonZeros() const { return size(); }
-    /** \returns true if either the number of rows or the number of columns is equal to 1.
-      * In other words, this function returns
-      * \code rows()==1 || cols()==1 \endcode
-      * \sa rows(), cols(), IsVectorAtCompileTime. */

    /** \returns the outer size.
      *
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -138,6 +138,8 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
    EIGEN_STRONG_INLINE CoeffReturnType
    coeff(Index index) const
    {
+      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
      eigen_internal_assert(index >= 0 && index < size());
      return internal::evaluator<Derived>(derived()).coeff(index);
    }
@@ -243,6 +245,8 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    {
+      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
      typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
      eigen_internal_assert(index >= 0 && index < size());
      return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(index);
@@ -370,6 +374,8 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
    EIGEN_STRONG_INLINE Scalar&
    coeffRef(Index index)
    {
+      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
      eigen_internal_assert(index >= 0 && index < size());
      return internal::evaluator<Derived>(derived()).coeffRef(index);
    }
@@ -617,7 +623,7 @@ static inline Index first_default_aligned(const DenseBase<Derived>& m)
 {
  typedef typename Derived::Scalar Scalar;
  typedef typename packet_traits<Scalar>::type DefaultPacketType;
-  return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(m);
+  return internal::first_aligned<int(unpacket_traits<DefaultPacketType>::alignment),Derived>(m);
 }

 template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -178,9 +178,11 @@ struct lpNorm_selector<Derived, Infinity>

 } // end namespace internal

-/** \returns the \f$ \ell^p \f$ norm of *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values
-  *          of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$
-  *          norm, that is the maximum of the absolute values of the coefficients of *this.
+/** \returns the \b coefficient-wise \f$ \ell^p \f$ norm of \c *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values
+  *          of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$
+  *          norm, that is the maximum of the absolute values of the coefficients of \c *this.
+  *
+  * \note For matrices, this function does not compute the <a href="https://en.wikipedia.org/wiki/Operator_norm">operator-norm</a>. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink.
  *
  * \sa norm()
  */
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -160,7 +160,7 @@ template<>              struct product_type_selector<Large,Large,Small>  { enum
 namespace internal {

 template<int Side, int StorageOrder, bool BlasCompatible>
-struct gemv_dense_sense_selector;
+struct gemv_dense_selector;

 } // end namespace internal

@@ -204,19 +204,19 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>

 // The vector is on the left => transposition
 template<int StorageOrder, bool BlasCompatible>
-struct gemv_dense_sense_selector<OnTheLeft,StorageOrder,BlasCompatible>
+struct gemv_dense_selector<OnTheLeft,StorageOrder,BlasCompatible>
 {
  template<typename Lhs, typename Rhs, typename Dest>
  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
  {
    Transpose<Dest> destT(dest);
    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
-    gemv_dense_sense_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
+    gemv_dense_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
      ::run(rhs.transpose(), lhs.transpose(), destT, alpha);
  }
 };

-template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,true>
+template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
 {
  template<typename Lhs, typename Rhs, typename Dest>
  static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
@@ -292,7 +292,7 @@ template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,true>
  }
 };

-template<> struct gemv_dense_sense_selector<OnTheRight,RowMajor,true>
+template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
 {
  template<typename Lhs, typename Rhs, typename Dest>
  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
@@ -345,27 +345,28 @@ template<> struct gemv_dense_sense_selector<OnTheRight,RowMajor,true>
  }
 };

-template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,false>
+template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
 {
  template<typename Lhs, typename Rhs, typename Dest>
  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
  {
-    // TODO makes sure dest is sequentially stored in memory, otherwise use a temp
+    // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
+    typename nested_eval<Rhs,1>::type actual_rhs(rhs);
    const Index size = rhs.rows();
    for(Index k=0; k<size; ++k)
-      dest += (alpha*rhs.coeff(k)) * lhs.col(k);
+      dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k);
  }
 };

-template<> struct gemv_dense_sense_selector<OnTheRight,RowMajor,false>
+template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
 {
  template<typename Lhs, typename Rhs, typename Dest>
  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
  {
-    // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
+    typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
    const Index rows = dest.rows();
    for(Index i=0; i<rows; ++i)
-      dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(rhs.transpose())).sum();
+      dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum();
  }
 };

--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -43,7 +43,7 @@ struct default_packet_traits
 {
  enum {
    HasHalfPacket = 0,
-    
+
    HasAdd    = 1,
    HasSub    = 1,
    HasMul    = 1,
@@ -74,10 +74,15 @@ struct default_packet_traits
    HasSinh    = 0,
    HasCosh    = 0,
    HasTanh    = 0,
+    HasLGamma = 0,
+    HasErf = 0,
+    HasErfc = 0,

    HasRound  = 0,
    HasFloor  = 0,
-    HasCeil   = 0
+    HasCeil   = 0,
+
+    HasSign   = 0
  };
 };

@@ -430,6 +435,18 @@ Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }

+/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); }
+
+/** \internal \returns the erf(\a a) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet perf(const Packet& a) { using numext::erf; return erf(a); }
+
+/** \internal \returns the erfc(\a a) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
+
 /***************************************************************************
 * The following functions might not have to be overwritten for vectorized types
 ***************************************************************************/
--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@@ -49,6 +49,9 @@ namespace Eigen
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op)
@@ -64,6 +67,7 @@ namespace Eigen
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op)
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op)
  
  template<typename Derived>
  inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar>, const Derived>
--- a/Eigen/src/Core/Inverse.h
+++ b/Eigen/src/Core/Inverse.h
@@ -12,8 +12,6 @@

 namespace Eigen { 

-// TODO move the general declaration in Core, and rename this file DenseInverseImpl.h, or something like this...
-
 template<typename XprType,typename StorageKind> class InverseImpl;

 namespace internal {
@@ -49,11 +47,13 @@ public:
  typedef typename XprType::PlainObject                       PlainObject;
  typedef typename internal::ref_selector<XprType>::type      XprTypeNested;
  typedef typename internal::remove_all<XprTypeNested>::type  XprTypeNestedCleaned;
+  typedef typename internal::ref_selector<Inverse>::type Nested;
+  typedef typename internal::remove_all<XprType>::type NestedExpression;
  
  explicit Inverse(const XprType &xpr)
    : m_xpr(xpr)
  {}
-  
+
  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }

@@ -63,25 +63,16 @@ protected:
  XprTypeNested m_xpr;
 };

-/** \internal
-  * Specialization of the Inverse expression for dense expressions.
-  * Direct access to the coefficients are discared.
-  * FIXME this intermediate class is probably not needed anymore.
-  */
-template<typename XprType>
-class InverseImpl<XprType,Dense>
-  : public MatrixBase<Inverse<XprType> >
+// Generic API dispatcher
+template<typename XprType, typename StorageKind>
+class InverseImpl
+  : public internal::generic_xpr_base<Inverse<XprType> >::type
 {
-  typedef Inverse<XprType> Derived;
-  
 public:
-  
-  typedef MatrixBase<Derived> Base;
-  EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
-  typedef typename internal::remove_all<XprType>::type NestedExpression;
-
+  typedef typename internal::generic_xpr_base<Inverse<XprType> >::type Base;
+  typedef typename XprType::Scalar Scalar;
 private:
-  
+
  Scalar coeff(Index row, Index col) const;
  Scalar coeff(Index i) const;
 };
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -155,6 +155,10 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
      checkSanity();
    }

+    #ifdef EIGEN_MAPBASE_PLUGIN
+    #include EIGEN_MAPBASE_PLUGIN
+    #endif
+
  protected:

    EIGEN_DEVICE_FUNC
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -241,8 +241,8 @@ struct conj_retval
 * Implementation of abs2                                                 *
 ****************************************************************************/

-template<typename Scalar>
-struct abs2_impl
+template<typename Scalar,bool IsComplex>
+struct abs2_impl_default
 {
  typedef typename NumTraits<Scalar>::Real RealScalar;
  EIGEN_DEVICE_FUNC
@@ -252,16 +252,28 @@ struct abs2_impl
  }
 };

-template<typename RealScalar>
-struct abs2_impl<std::complex<RealScalar> >
+template<typename Scalar>
+struct abs2_impl_default<Scalar, true> // IsComplex
 {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
  EIGEN_DEVICE_FUNC
-  static inline RealScalar run(const std::complex<RealScalar>& x)
+  static inline RealScalar run(const Scalar& x)
  {
    return real(x)*real(x) + imag(x)*imag(x);
  }
 };

+template<typename Scalar>
+struct abs2_impl
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const Scalar& x)
+  {
+    return abs2_impl_default<Scalar,NumTraits<Scalar>::IsComplex>::run(x);
+  }
+};
+
 template<typename Scalar>
 struct abs2_retval
 {
@@ -314,8 +326,6 @@ struct hypot_impl
  typedef typename NumTraits<Scalar>::Real RealScalar;
  static inline RealScalar run(const Scalar& x, const Scalar& y)
  {
-    EIGEN_USING_STD_MATH(max);
-    EIGEN_USING_STD_MATH(min);
    EIGEN_USING_STD_MATH(abs);
    EIGEN_USING_STD_MATH(sqrt);
    RealScalar _x = abs(x);
@@ -607,8 +617,6 @@ struct random_default_impl<Scalar, false, true>
 {
  static inline Scalar run(const Scalar& x, const Scalar& y)
  { 
-    using std::max;
-    using std::min;
    typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
    if(y<x)
      return x;
@@ -667,6 +675,115 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
  return EIGEN_MATHFUNC_IMPL(random, Scalar)::run();
 }

+// Implementatin of is* functions
+
+// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang.
+#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG)
+#define EIGEN_USE_STD_FPCLASSIFY 1
+#else
+#define EIGEN_USE_STD_FPCLASSIFY 0
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+isnan_impl(const T&) { return false; }
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+isinf_impl(const T&) { return false; }
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+isfinite_impl(const T&) { return true; }
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+isfinite_impl(const T& x)
+{
+  #if EIGEN_USE_STD_FPCLASSIFY
+    using std::isfinite;
+    return isfinite EIGEN_NOT_A_MACRO (x);
+  #else
+    return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
+  #endif
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+isinf_impl(const T& x)
+{
+  #if EIGEN_USE_STD_FPCLASSIFY
+    using std::isinf;
+    return isinf EIGEN_NOT_A_MACRO (x);
+  #else
+    return x>NumTraits<T>::highest() || x<NumTraits<T>::lowest();
+  #endif
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+isnan_impl(const T& x)
+{
+  #if EIGEN_USE_STD_FPCLASSIFY
+    using std::isnan;
+    return isnan EIGEN_NOT_A_MACRO (x);
+  #else
+    return x != x;
+  #endif
+}
+
+#if (!EIGEN_USE_STD_FPCLASSIFY)
+
+#if EIGEN_COMP_MSVC
+
+template<typename T> EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x)
+{
+  return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF;
+}
+
+//MSVC defines a _isnan builtin function, but for double only
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x); }
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x)      { return _isnan(x); }
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x)       { return _isnan(x); }
+
+EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); }
+EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x)      { return isinf_msvc_helper(x); }
+EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x)       { return isinf_msvc_helper(x); }
+
+#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC)
+
+#if EIGEN_GNUC_AT_LEAST(5,0)
+  #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only")))
+#else
+  // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol),
+  //      while the second prevent too aggressive optimizations in fast-math mode:
+  #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only")))
+#endif
+
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x)      { return __builtin_isnan(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x)       { return __builtin_isnan(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x)      { return __builtin_isinf(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x)       { return __builtin_isinf(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); }
+
+#undef EIGEN_TMP_NOOPT_ATTRIB
+
+#endif
+
+#endif
+
+// The following overload are defined at the end of this file
+template<typename T> bool isfinite_impl(const std::complex<T>& x);
+template<typename T> bool isnan_impl(const std::complex<T>& x);
+template<typename T> bool isinf_impl(const std::complex<T>& x);
+
 } // end namespace internal

 /****************************************************************************
@@ -810,59 +927,9 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
  return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y);
 }

-template<typename T>
-EIGEN_DEVICE_FUNC
-bool (isfinite)(const T& x)
-{
-  #if EIGEN_HAS_CXX11_MATH
-    using std::isfinite;
-    return isfinite EIGEN_NOT_A_MACRO (x);
-  #else
-    return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
-  #endif
-}
-
-template<typename T>
-EIGEN_DEVICE_FUNC
-bool (isnan)(const T& x)
-{
-  #if EIGEN_HAS_CXX11_MATH
-    using std::isnan;
-    return isnan EIGEN_NOT_A_MACRO (x);
-  #else
-    return x != x;
-  #endif
-}
-
-template<typename T>
-EIGEN_DEVICE_FUNC
-bool (isinf)(const T& x)
-{
-  #if EIGEN_HAS_CXX11_MATH
-    using std::isinf;
-    return isinf EIGEN_NOT_A_MACRO (x);
-  #else
-    return x>NumTraits<T>::highest() || x<NumTraits<T>::lowest();
-  #endif
-}
-
-template<typename T>
-bool (isfinite)(const std::complex<T>& x)
-{
-  return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x));
-}
-
-template<typename T>
-bool (isnan)(const std::complex<T>& x)
-{
-  return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x));
-}
-
-template<typename T>
-bool (isinf)(const std::complex<T>& x)
-{
-  return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x));
-}
+template<typename T> EIGEN_DEVICE_FUNC bool (isnan)   (const T &x) { return internal::isnan_impl(x); }
+template<typename T> EIGEN_DEVICE_FUNC bool (isinf)   (const T &x) { return internal::isinf_impl(x); }
+template<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); }

 template<typename Scalar>
 EIGEN_DEVICE_FUNC
@@ -906,6 +973,24 @@ inline int log2(int x)

 namespace internal {

+template<typename T>
+bool isfinite_impl(const std::complex<T>& x)
+{
+  return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x));
+}
+
+template<typename T>
+bool isnan_impl(const std::complex<T>& x)
+{
+  return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x));
+}
+
+template<typename T>
+bool isinf_impl(const std::complex<T>& x)
+{
+  return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x));
+}
+
 /****************************************************************************
 * Implementation of fuzzy comparisons                                       *
 ****************************************************************************/
@@ -928,9 +1013,8 @@ struct scalar_fuzzy_default_impl<Scalar, false, false>
  EIGEN_DEVICE_FUNC
  static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
  {
-    EIGEN_USING_STD_MATH(min);
    EIGEN_USING_STD_MATH(abs);
-    return abs(x - y) <= (min)(abs(x), abs(y)) * prec;
+    return abs(x - y) <= numext::mini(abs(x), abs(y)) * prec;
  }
  EIGEN_DEVICE_FUNC
  static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
@@ -971,8 +1055,7 @@ struct scalar_fuzzy_default_impl<Scalar, true, false>
  }
  static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
  {
-    EIGEN_USING_STD_MATH(min);
-    return numext::abs2(x - y) <= (min)(numext::abs2(x), numext::abs2(y)) * prec * prec;
+    return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec;
  }
 };

--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -328,23 +328,26 @@ template<typename Derived> class MatrixBase

 /////////// LU module ///////////

-    EIGEN_DEVICE_FUNC const FullPivLU<PlainObject> fullPivLu() const;
-    EIGEN_DEVICE_FUNC const PartialPivLU<PlainObject> partialPivLu() const;
-
-    const PartialPivLU<PlainObject> lu() const;
+    EIGEN_DEVICE_FUNC
+    inline const FullPivLU<PlainObject> fullPivLu() const;
+    EIGEN_DEVICE_FUNC
+    inline const PartialPivLU<PlainObject> partialPivLu() const;

    EIGEN_DEVICE_FUNC
-    const Inverse<Derived> inverse() const;
+    inline const PartialPivLU<PlainObject> lu() const;
+
+    EIGEN_DEVICE_FUNC
+    inline const Inverse<Derived> inverse() const;
    
    template<typename ResultType>
-    void computeInverseAndDetWithCheck(
+    inline void computeInverseAndDetWithCheck(
      ResultType& inverse,
      typename ResultType::Scalar& determinant,
      bool& invertible,
      const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
    ) const;
    template<typename ResultType>
-    void computeInverseWithCheck(
+    inline void computeInverseWithCheck(
      ResultType& inverse,
      bool& invertible,
      const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
@@ -353,22 +356,24 @@ template<typename Derived> class MatrixBase

 /////////// Cholesky module ///////////

-    const LLT<PlainObject>  llt() const;
-    const LDLT<PlainObject> ldlt() const;
+    inline const LLT<PlainObject>  llt() const;
+    inline const LDLT<PlainObject> ldlt() const;

 /////////// QR module ///////////

-    const HouseholderQR<PlainObject> householderQr() const;
-    const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
-    const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
+    inline const HouseholderQR<PlainObject> householderQr() const;
+    inline const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
+    inline const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;

-    EigenvaluesReturnType eigenvalues() const;
-    RealScalar operatorNorm() const;
+/////////// Eigenvalues module ///////////
+
+    inline EigenvaluesReturnType eigenvalues() const;
+    inline RealScalar operatorNorm() const;

 /////////// SVD module ///////////

-    JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
-    BDCSVD<PlainObject>    bdcSvd(unsigned int computationOptions = 0) const;
+    inline JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
+    inline BDCSVD<PlainObject>    bdcSvd(unsigned int computationOptions = 0) const;

 /////////// Geometry module ///////////

@@ -381,24 +386,24 @@ template<typename Derived> class MatrixBase
    #endif // EIGEN_PARSED_BY_DOXYGEN
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
-    typename cross_product_return_type<OtherDerived>::type
+    inline typename cross_product_return_type<OtherDerived>::type
    cross(const MatrixBase<OtherDerived>& other) const;
    
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
-    PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
+    inline PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
    
    EIGEN_DEVICE_FUNC
-    PlainObject unitOrthogonal(void) const;
+    inline PlainObject unitOrthogonal(void) const;
    
-    Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
+    inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
    
-    ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
+    inline ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
    // put this as separate enum value to work around possible GCC 4.3 bug (?)
    enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
                                          : ColsAtCompileTime==1 ? Vertical : Horizontal };
    typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
-    HomogeneousReturnType homogeneous() const;
+    inline HomogeneousReturnType homogeneous() const;
    
    enum {
      SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
@@ -409,7 +414,7 @@ template<typename Derived> class MatrixBase
    typedef CwiseUnaryOp<internal::scalar_quotient1_op<typename internal::traits<Derived>::Scalar>,
                const ConstStartMinusOne > HNormalizedReturnType;

-    const HNormalizedReturnType hnormalized() const;
+    inline const HNormalizedReturnType hnormalized() const;

 ////////// Householder module ///////////

@@ -433,6 +438,15 @@ template<typename Derived> class MatrixBase
    template<typename OtherScalar>
    void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);

+///////// SparseCore module /////////
+
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE const typename SparseMatrixBase<OtherDerived>::template CwiseProductDenseReturnType<Derived>::Type
+    cwiseProduct(const SparseMatrixBase<OtherDerived> &other) const
+    {
+      return other.cwiseProduct(derived());
+    }
+
 ///////// MatrixFunctions module /////////

    typedef typename internal::stem_function<Scalar>::type StemFunction;
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -157,9 +157,9 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
    IsInteger = NumTraits<Scalar>::IsInteger,
    IsSigned  = NumTraits<Scalar>::IsSigned,
    RequireInitialization = 1,
-    ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost,
-    AddCost  = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
-    MulCost  = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
+    ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost,
+    AddCost  = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
+    MulCost  = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
  };
  
  static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -2,7 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2009-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -13,9 +13,6 @@

 namespace Eigen { 

-// TODO: this does not seems to be needed at all:
-// template<int RowCol,typename IndicesType,typename MatrixType, typename StorageKind> class PermutedImpl;
-
 /** \class PermutationBase
  * \ingroup Core_Module
  *
@@ -67,8 +64,10 @@ class PermutationBase : public EigenBase<Derived>
            DenseMatrixType;
    typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,StorageIndex>
            PlainPermutationType;
+    typedef PlainPermutationType PlainObject;
    using Base::derived;
-    typedef Transpose<PermutationBase> TransposeReturnType;
+    typedef Inverse<Derived> InverseReturnType;
+    typedef void Scalar;
    #endif

    /** Copies the other permutation into *this */
@@ -195,14 +194,14 @@ class PermutationBase : public EigenBase<Derived>
      *
      * \note \note_try_to_help_rvo
      */
-    inline TransposeReturnType inverse() const
-    { return TransposeReturnType(derived()); }
+    inline InverseReturnType inverse() const
+    { return InverseReturnType(derived()); }
    /** \returns the tranpose permutation matrix.
      *
      * \note \note_try_to_help_rvo
      */
-    inline TransposeReturnType transpose() const
-    { return TransposeReturnType(derived()); }
+    inline InverseReturnType transpose() const
+    { return InverseReturnType(derived()); }

    /**** multiplication helpers to hopefully get RVO ****/

@@ -237,7 +236,7 @@ class PermutationBase : public EigenBase<Derived>
      * \note \note_try_to_help_rvo
      */
    template<typename Other>
-    inline PlainPermutationType operator*(const Transpose<PermutationBase<Other> >& other) const
+    inline PlainPermutationType operator*(const InverseImpl<Other,PermutationStorage>& other) const
    { return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); }

    /** \returns the product of an inverse permutation with another permutation.
@@ -245,7 +244,7 @@ class PermutationBase : public EigenBase<Derived>
      * \note \note_try_to_help_rvo
      */
    template<typename Other> friend
-    inline PlainPermutationType operator*(const Transpose<PermutationBase<Other> >& other, const PermutationBase& perm)
+    inline PlainPermutationType operator*(const InverseImpl<Other, PermutationStorage>& other, const PermutationBase& perm)
    { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); }
    
    /** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation.
@@ -303,6 +302,7 @@ struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _Storag
  typedef PermutationStorage StorageKind;
  typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
  typedef _StorageIndex StorageIndex;
+  typedef void Scalar;
 };
 }

@@ -396,13 +396,13 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile

 #ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename Other>
-    PermutationMatrix(const Transpose<PermutationBase<Other> >& other)
-      : m_indices(other.nestedExpression().size())
+    PermutationMatrix(const InverseImpl<Other,PermutationStorage>& other)
+      : m_indices(other.derived().nestedExpression().size())
    {
      eigen_internal_assert(m_indices.size() <= NumTraits<StorageIndex>::highest());
      StorageIndex end = StorageIndex(m_indices.size());
      for (StorageIndex i=0; i<end;++i)
-        m_indices.coeffRef(other.nestedExpression().indices().coeff(i)) = i;
+        m_indices.coeffRef(other.derived().nestedExpression().indices().coeff(i)) = i;
    }
    template<typename Lhs,typename Rhs>
    PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs)
@@ -426,6 +426,7 @@ struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _St
  typedef PermutationStorage StorageKind;
  typedef Map<const Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
  typedef _StorageIndex StorageIndex;
+  typedef void Scalar;
 };
 }

@@ -499,7 +500,7 @@ template<typename _IndicesType>
 struct traits<PermutationWrapper<_IndicesType> >
 {
  typedef PermutationStorage StorageKind;
-  typedef typename _IndicesType::Scalar Scalar;
+  typedef void Scalar;
  typedef typename _IndicesType::Scalar StorageIndex;
  typedef _IndicesType IndicesType;
  enum {
@@ -561,84 +562,61 @@ operator*(const PermutationBase<PermutationDerived> &permutation,
            (permutation.derived(), matrix.derived());
 }

-namespace internal {

-/* Template partial specialization for transposed/inverse permutations */
-
-template<typename Derived>
-struct traits<Transpose<PermutationBase<Derived> > >
- : traits<Derived>
-{};
-
-} // end namespace internal
-
-// TODO: the specificties should be handled by the evaluator,
-// at the very least we should only specialize TransposeImpl
-template<typename Derived>
-class Transpose<PermutationBase<Derived> >
-  : public EigenBase<Transpose<PermutationBase<Derived> > >
+template<typename PermutationType>
+class InverseImpl<PermutationType, PermutationStorage>
+  : public EigenBase<Inverse<PermutationType> >
 {
-    typedef Derived PermutationType;
-    typedef typename PermutationType::IndicesType IndicesType;
    typedef typename PermutationType::PlainPermutationType PlainPermutationType;
+    typedef internal::traits<PermutationType> PermTraits;
+  protected:
+    InverseImpl() {}
  public:
+    typedef Inverse<PermutationType> InverseType;
+    using EigenBase<Inverse<PermutationType> >::derived;

    #ifndef EIGEN_PARSED_BY_DOXYGEN
-    typedef internal::traits<PermutationType> Traits;
-    typedef typename Derived::DenseMatrixType DenseMatrixType;
+    typedef typename PermutationType::DenseMatrixType DenseMatrixType;
    enum {
-      Flags = Traits::Flags,
-      RowsAtCompileTime = Traits::RowsAtCompileTime,
-      ColsAtCompileTime = Traits::ColsAtCompileTime,
-      MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
-      MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
+      RowsAtCompileTime = PermTraits::RowsAtCompileTime,
+      ColsAtCompileTime = PermTraits::ColsAtCompileTime,
+      MaxRowsAtCompileTime = PermTraits::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = PermTraits::MaxColsAtCompileTime
    };
-    typedef typename Traits::Scalar Scalar;
-    typedef typename Traits::StorageIndex StorageIndex;
    #endif

-    Transpose(const PermutationType& p) : m_permutation(p) {}
-
-    inline Index rows() const { return m_permutation.rows(); }
-    inline Index cols() const { return m_permutation.cols(); }
-
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename DenseDerived>
    void evalTo(MatrixBase<DenseDerived>& other) const
    {
      other.setZero();
-      for (Index i=0; i<rows();++i)
-        other.coeffRef(i, m_permutation.indices().coeff(i)) = typename DenseDerived::Scalar(1);
+      for (Index i=0; i<derived().rows();++i)
+        other.coeffRef(i, derived().nestedExpression().indices().coeff(i)) = typename DenseDerived::Scalar(1);
    }
    #endif

    /** \return the equivalent permutation matrix */
-    PlainPermutationType eval() const { return *this; }
+    PlainPermutationType eval() const { return derived(); }

-    DenseMatrixType toDenseMatrix() const { return *this; }
+    DenseMatrixType toDenseMatrix() const { return derived(); }

    /** \returns the matrix with the inverse permutation applied to the columns.
      */
    template<typename OtherDerived> friend
-    const Product<OtherDerived, Transpose, AliasFreeProduct>
-    operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trPerm)
+    const Product<OtherDerived, InverseType, AliasFreeProduct>
+    operator*(const MatrixBase<OtherDerived>& matrix, const InverseType& trPerm)
    {
-      return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trPerm.derived());
+      return Product<OtherDerived, InverseType, AliasFreeProduct>(matrix.derived(), trPerm.derived());
    }

    /** \returns the matrix with the inverse permutation applied to the rows.
      */
    template<typename OtherDerived>
-    const Product<Transpose, OtherDerived, AliasFreeProduct>
+    const Product<InverseType, OtherDerived, AliasFreeProduct>
    operator*(const MatrixBase<OtherDerived>& matrix) const
    {
-      return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived());
+      return Product<InverseType, OtherDerived, AliasFreeProduct>(derived(), matrix.derived());
    }
-
-    const PermutationType& nestedExpression() const { return m_permutation; }
-
-  protected:
-    const PermutationType& m_permutation;
 };

 template<typename Derived>
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -263,7 +263,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
        m_storage.resize(size, rows, cols);
        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
      #else
-        internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
        m_storage.resize(rows*cols, rows, cols);
      #endif
    }
@@ -450,6 +449,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      return Base::operator=(func);
    }

+    // Prevent user from trying to instantiate PlainObjectBase objects
+    // by making all its constructor protected. See bug 1074.
+  protected:
+
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
    {
@@ -496,17 +499,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
 //       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }

-    /** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
-      */
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC 
-    EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
-    {
-      _resize_to_match(other);
-      Base::operator=(other.derived());
-      return this->derived();
-    }
-
    /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
@@ -520,7 +512,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type

    /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC 
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
      : m_storage()
    {
@@ -539,6 +531,19 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      other.evalTo(this->derived());
    }

+  public:
+
+    /** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
+    EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
+    {
+      _resize_to_match(other);
+      Base::operator=(other.derived());
+      return this->derived();
+    }
+
    /** \name Map
      * These are convenience functions returning Map objects. The Map() static functions return unaligned Map objects,
      * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -217,29 +217,6 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
  
 };

-/***************************************************************************
-* Implementation of matrix base methods
-***************************************************************************/
-
-
-/** \internal used to test the evaluator only
-  */
-template<typename Lhs,typename Rhs>
-const Product<Lhs,Rhs>
-prod(const Lhs& lhs, const Rhs& rhs)
-{
-  return Product<Lhs,Rhs>(lhs,rhs);
-}
-
-/** \internal used to test the evaluator only
-  */
-template<typename Lhs,typename Rhs>
-const Product<Lhs,Rhs,LazyProduct>
-lazyprod(const Lhs& lhs, const Rhs& rhs)
-{
-  return Product<Lhs,Rhs,LazyProduct>(lhs,rhs);
-}
-
 } // end namespace Eigen

 #endif // EIGEN_PRODUCT_H
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -38,6 +38,12 @@ struct evaluator<Product<Lhs, Rhs, Options> >
 // Catch scalar * ( A * B ) and transform it to (A*scalar) * B
 // TODO we should apply that rule only if that's really helpful
 template<typename Lhs, typename Rhs, typename Scalar>
+struct evaluator_traits<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,  const Product<Lhs, Rhs, DefaultProduct>  > >
+ : evaluator_traits_base<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,  const Product<Lhs, Rhs, DefaultProduct>  > >
+{
+  enum { AssumeAliasing = 1 };
+};
+template<typename Lhs, typename Rhs, typename Scalar>
 struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,  const Product<Lhs, Rhs, DefaultProduct>  > > 
 : public evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> >
 {
@@ -91,8 +97,7 @@ struct evaluator_traits<Product<Lhs, Rhs, AliasFreeProduct> >
 // This is the default evaluator implementation for products:
 // It creates a temporary and call generic_product_impl
 template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
-struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar,
-  EnableIf<(Options==DefaultProduct || Options==AliasFreeProduct)> >
+struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape>
  : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject>
 {
  typedef Product<Lhs, Rhs, Options> XprType;
@@ -177,11 +182,41 @@ struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBi
                       const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
  static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
  {
-    // TODO use operator* instead of prod() once we have made enough progress
-    call_assignment(dst.noalias(), prod(src.functor().m_other * src.nestedExpression().lhs(), src.nestedExpression().rhs()), func);
+    call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func);
  }
 };

+//----------------------------------------
+// Catch "Dense ?= xpr + Product<>" expression to save one temporary
+// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
+
+template<typename DstXprType, typename OtherXpr, typename ProductType, typename Scalar, typename Func1, typename Func2>
+struct assignment_from_xpr_plus_product
+{
+  typedef CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, const ProductType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const Func1& func)
+  {
+    call_assignment_no_alias(dst, src.lhs(), func);
+    call_assignment_no_alias(dst, src.rhs(), Func2());
+  }
+};
+
+template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
+                                           const Product<Lhs,Rhs,DefaultProduct> >, internal::assign_op<Scalar>, Dense2Dense>
+  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::assign_op<Scalar>, internal::add_assign_op<Scalar> >
+{};
+template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
+                                           const Product<Lhs,Rhs,DefaultProduct> >, internal::add_assign_op<Scalar>, Dense2Dense>
+  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::add_assign_op<Scalar>, internal::add_assign_op<Scalar> >
+{};
+template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
+                                           const Product<Lhs,Rhs,DefaultProduct> >, internal::sub_assign_op<Scalar>, Dense2Dense>
+  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::sub_assign_op<Scalar>, internal::sub_assign_op<Scalar> >
+{};
+//----------------------------------------

 template<typename Lhs, typename Rhs>
 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
@@ -213,12 +248,12 @@ template<typename Dst, typename Lhs, typename Rhs, typename Func>
 EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
 {
  evaluator<Rhs> rhsEval(rhs);
-  // FIXME make sure lhs is sequentially stored
+  typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
+  // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
  // FIXME not very good if rhs is real and lhs complex while alpha is real too
-  // FIXME we should probably build an evaluator for dst
  const Index cols = dst.cols();
  for (Index j=0; j<cols; ++j)
-    func(dst.col(j), rhsEval.coeff(0,j) * lhs);
+    func(dst.col(j), rhsEval.coeff(0,j) * actual_lhs);
 }

 // Row major result
@@ -226,18 +261,18 @@ template<typename Dst, typename Lhs, typename Rhs, typename Func>
 EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
 {
  evaluator<Lhs> lhsEval(lhs);
-  // FIXME make sure rhs is sequentially stored
+  typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
+  // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
  // FIXME not very good if lhs is real and rhs complex while alpha is real too
-  // FIXME we should probably build an evaluator for dst
  const Index rows = dst.rows();
  for (Index i=0; i<rows; ++i)
-    func(dst.row(i), lhsEval.coeff(i,0) * rhs);
+    func(dst.row(i), lhsEval.coeff(i,0) * actual_rhs);
 }

 template<typename Lhs, typename Rhs>
 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
 {
-  template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
+  template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
  
  // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
@@ -255,25 +290,25 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
  template<typename Dst>
  static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
  {
-    internal::outer_product_selector_run(dst, lhs, rhs, set(), IsRowMajor<Dst>());
+    internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
  }
  
  template<typename Dst>
  static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
  {
-    internal::outer_product_selector_run(dst, lhs, rhs, add(), IsRowMajor<Dst>());
+    internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
  }
  
  template<typename Dst>
  static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
  {
-    internal::outer_product_selector_run(dst, lhs, rhs, sub(), IsRowMajor<Dst>());
+    internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
  }
  
  template<typename Dst>
  static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
  {
-    internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), IsRowMajor<Dst>());
+    internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
  }
  
 };
@@ -314,7 +349,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
  template<typename Dest>
  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
  {
-    internal::gemv_dense_sense_selector<Side,
+    internal::gemv_dense_selector<Side,
                            (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
                            bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
                           >::run(lhs, rhs, dst, alpha);
@@ -329,28 +364,28 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
  template<typename Dst>
  static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
  {
-    // TODO: use the following instead of calling call_assignment, same for the other methods
-    // dst = lazyprod(lhs,rhs);
-    call_assignment(dst, lazyprod(lhs,rhs), internal::assign_op<Scalar>());
+    // Same as: dst.noalias() = lhs.lazyProduct(rhs);
+    // but easier on the compiler side
+    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<Scalar>());
  }
  
  template<typename Dst>
  static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
  {
-    // dst += lazyprod(lhs,rhs);
-    call_assignment(dst, lazyprod(lhs,rhs), internal::add_assign_op<Scalar>());
+    // dst.noalias() += lhs.lazyProduct(rhs);
+    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<Scalar>());
  }
  
  template<typename Dst>
  static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
  {
-    // dst -= lazyprod(lhs,rhs);
-    call_assignment(dst, lazyprod(lhs,rhs), internal::sub_assign_op<Scalar>());
+    // dst.noalias() -= lhs.lazyProduct(rhs);
+    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<Scalar>());
  }
  
 //   template<typename Dst>
 //   static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
-//   { dst += alpha * lazyprod(lhs,rhs); }
+//   { dst.noalias() += alpha * lhs.lazyProduct(rhs); }
 };

 // This specialization enforces the use of a coefficient-based evaluation strategy
@@ -371,7 +406,7 @@ template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typen
 struct etor_product_packet_impl;

 template<typename Lhs, typename Rhs, int ProductTag>
-struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar > 
+struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape>
    : evaluator_base<Product<Lhs, Rhs, LazyProduct> >
 {
  typedef Product<Lhs, Rhs, LazyProduct> XprType;
@@ -387,7 +422,11 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
      m_rhsImpl(m_rhs),     //       Moreover, they are only useful for the packet path, so we could completely disable them when not needed,
                            //       or perhaps declare them on the fly on the packet method... We have experiment to check what's best.
      m_innerDim(xpr.lhs().cols())
-  { }
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }

  // Everything below here is taken from CoeffBasedProduct.h

@@ -408,15 +447,15 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
    MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime,
      
    PacketSize = packet_traits<Scalar>::size,
-    
+
    LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
    RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
    CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
-                  : (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic
+                  : InnerSize == Dynamic ? HugeCost
                  : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
                    + (InnerSize - 1) * NumTraits<Scalar>::AddCost,

-    Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
+    Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
    
    LhsFlags = LhsEtorType::Flags,
    RhsFlags = RhsEtorType::Flags,
@@ -424,19 +463,16 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
    LhsAlignment = LhsEtorType::Alignment,
    RhsAlignment = RhsEtorType::Alignment,
    
-    LhsIsAligned = int(LhsAlignment) >= int(unpacket_traits<PacketScalar>::alignment),
-    RhsIsAligned = int(RhsAlignment) >= int(unpacket_traits<PacketScalar>::alignment),
-    
    LhsRowMajor = LhsFlags & RowMajorBit,
    RhsRowMajor = RhsFlags & RowMajorBit,
      
    SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,

    CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
-                    && (ColsAtCompileTime == Dynamic || ( (ColsAtCompileTime % PacketSize) == 0 && RhsIsAligned ) ),
+                    && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % PacketSize) == 0) ),

    CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
-                    && (RowsAtCompileTime == Dynamic || ( (RowsAtCompileTime % PacketSize) == 0 && LhsIsAligned ) ),
+                    && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % PacketSize) == 0) ),

    EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
                    : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
@@ -445,12 +481,16 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
    Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
          | (EvalToRowMajor ? RowMajorBit : 0)
          // TODO enable vectorization for mixed types
-          | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
+          | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
+          | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
          
-    Alignment = CanVectorizeLhs ? LhsAlignment
-              : CanVectorizeRhs ? RhsAlignment
+    LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
+    RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
+
+    Alignment = CanVectorizeLhs ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
+              : CanVectorizeRhs ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
              : 0,
-          
+
    /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
    * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
    * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
@@ -460,13 +500,11 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
                        && LhsRowMajor
                        && (!RhsRowMajor)
                        && (LhsFlags & RhsFlags & ActualPacketAccessBit)
-                        && (LhsIsAligned && RhsIsAligned)
                        && (InnerSize % packet_traits<Scalar>::size == 0)
  };
  
-  EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index row, Index col) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
  {
-    // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function
    return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
  }

@@ -478,7 +516,6 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
  {
    const Index row = RowsAtCompileTime == 1 ? 0 : index;
    const Index col = RowsAtCompileTime == 1 ? index : 0;
-    // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function
    return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
  }

@@ -486,14 +523,21 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
  const PacketType packet(Index row, Index col) const
  {
    PacketType res;
-    typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
-                                     Unroll ? InnerSize : Dynamic,
+    typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor,
+                                     Unroll ? int(InnerSize) : Dynamic,
                                     LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;
-
    PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
    return res;
  }

+  template<int LoadMode, typename PacketType>
+  const PacketType packet(Index index) const
+  {
+    const Index row = RowsAtCompileTime == 1 ? 0 : index;
+    const Index col = RowsAtCompileTime == 1 ? index : 0;
+    return packet<LoadMode,PacketType>(row,col);
+  }
+
 protected:
  const LhsNested m_lhs;
  const RhsNested m_rhs;
@@ -506,12 +550,12 @@ protected:
 };

 template<typename Lhs, typename Rhs>
-struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar > 
-  : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar >
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
+  : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape>
 {
  typedef Product<Lhs, Rhs, DefaultProduct> XprType;
  typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
-  typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar > Base;
+  typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base;
  enum {
    Flags = Base::Flags | EvalBeforeNestingBit
  };
@@ -703,6 +747,8 @@ public:
  diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
    : m_diagImpl(diag), m_matImpl(mat)
  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
  }
  
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
@@ -735,7 +781,7 @@ protected:

 // diagonal * dense
 template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
-struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> 
+struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape>
  : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>
 {
  typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base;
@@ -781,7 +827,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha

 // dense * diagonal
 template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
-struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape, typename Lhs::Scalar, typename Rhs::Scalar> 
+struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape>
  : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight>
 {
  typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base;
@@ -911,20 +957,20 @@ struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
 };

 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
-struct generic_product_impl<Transpose<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
+struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
 {
  template<typename Dest>
-  static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
+  static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
  {
    permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
  }
 };

 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
-struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, PermutationShape, ProductTag>
+struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
 {
  template<typename Dest>
-  static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
+  static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
  {
    permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
  }
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -50,20 +50,14 @@ public:

 public:
  enum {
-    Cost = (  Derived::SizeAtCompileTime == Dynamic
-           || Derived::CoeffReadCost == Dynamic
-           || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
-           ) ? Dynamic
-           : Derived::SizeAtCompileTime * Derived::CoeffReadCost
-               + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
+    Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost
+         : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
    UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
  };

 public:
  enum {
-    Unrolling = Cost != Dynamic && Cost <= UnrollingLimit
-              ? CompleteUnrolling
-              : NoUnrolling
+    Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
  };
  
 #ifdef EIGEN_DEBUG_ASSIGN
@@ -269,8 +263,9 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
  }
 };

-template<typename Func, typename Derived>
-struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
+// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
+template<typename Func, typename Derived, int Unrolling>
+struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
 {
  typedef typename Derived::Scalar Scalar;
  typedef typename packet_traits<Scalar>::type PacketType;
@@ -414,17 +409,7 @@ typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::redux(const Func& func) const
 {
  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
-  
-  // FIXME, eval_nest should be handled by redux_evaluator, however:
-  //  - it is currently difficult to provide the right Flags since they are still handled by the expressions
-  //  - handling it here might reduce the number of template instantiations
-//   typedef typename internal::nested_eval<Derived,1>::type ThisNested;
-//   typedef typename internal::remove_all<ThisNested>::type ThisNestedCleaned;
-//   typedef typename internal::redux_evaluator<ThisNestedCleaned> ThisEvaluator;
-//   
-//   ThisNested thisNested(derived());
-//   ThisEvaluator thisEval(thisNested);
-  
+
  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
  ThisEvaluator thisEval(derived());
  
--- a/Eigen/src/Core/Solve.h
+++ b/Eigen/src/Core/Solve.h
@@ -34,12 +34,11 @@ template<typename Decomposition, typename RhsType,typename StorageKind> struct s
 template<typename Decomposition, typename RhsType>
 struct solve_traits<Decomposition,RhsType,Dense>
 {
-  typedef typename Decomposition::MatrixType MatrixType;
  typedef Matrix<typename RhsType::Scalar,
-                 MatrixType::ColsAtCompileTime,
+                 Decomposition::ColsAtCompileTime,
                 RhsType::ColsAtCompileTime,
                 RhsType::PlainObject::Options,
-                 MatrixType::MaxColsAtCompileTime,
+                 Decomposition::MaxColsAtCompileTime,
                 RhsType::MaxColsAtCompileTime> PlainObject;  
 };

@@ -52,7 +51,7 @@ struct traits<Solve<Decomposition, RhsType> >
  typedef traits<PlainObject> BaseTraits;
  enum {
    Flags = BaseTraits::Flags & RowMajorBit,
-    CoeffReadCost = Dynamic
+    CoeffReadCost = HugeCost
  };
 };

@@ -118,6 +117,8 @@ struct evaluator<Solve<Decomposition,RhsType> >
  typedef Solve<Decomposition,RhsType> SolveType;
  typedef typename SolveType::PlainObject PlainObject;
  typedef evaluator<PlainObject> Base;
+
+  enum { Flags = Base::Flags | EvalBeforeNestingBit };
  
  EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve)
    : m_result(solve.rows(), solve.cols())
@@ -143,6 +144,28 @@ struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar
  }
 };

+// Specialization for "dst = dec.transpose().solve(rhs)"
+template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
+struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+  typedef Solve<Transpose<const DecType>,RhsType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  {
+    src.dec().nestedExpression().template _solve_impl_transposed<false>(src.rhs(), dst);
+  }
+};
+
+// Specialization for "dst = dec.adjoint().solve(rhs)"
+template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
+struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+  typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  {
+    src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst);
+  }
+};
+
 } // end namepsace internal

 } // end namespace Eigen
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -107,32 +107,32 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
 * meta-unrolling implementation
 ***************************************************************************/

-template<typename Lhs, typename Rhs, int Mode, int Index, int Size,
-         bool Stop = Index==Size>
+template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size,
+         bool Stop = LoopIndex==Size>
 struct triangular_solver_unroller;

-template<typename Lhs, typename Rhs, int Mode, int Index, int Size>
-struct triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,false> {
+template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>
+struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,false> {
  enum {
    IsLower = ((Mode&Lower)==Lower),
-    I = IsLower ? Index : Size - Index - 1,
-    S = IsLower ? 0     : I+1
+    DiagIndex  = IsLower ? LoopIndex : Size - LoopIndex - 1,
+    StartIndex = IsLower ? 0         : DiagIndex+1
  };
  static void run(const Lhs& lhs, Rhs& rhs)
  {
-    if (Index>0)
-      rhs.coeffRef(I) -= lhs.row(I).template segment<Index>(S).transpose()
-                         .cwiseProduct(rhs.template segment<Index>(S)).sum();
+    if (LoopIndex>0)
+      rhs.coeffRef(DiagIndex) -= lhs.row(DiagIndex).template segment<LoopIndex>(StartIndex).transpose()
+                                .cwiseProduct(rhs.template segment<LoopIndex>(StartIndex)).sum();

    if(!(Mode & UnitDiag))
-      rhs.coeffRef(I) /= lhs.coeff(I,I);
+      rhs.coeffRef(DiagIndex) /= lhs.coeff(DiagIndex,DiagIndex);

-    triangular_solver_unroller<Lhs,Rhs,Mode,Index+1,Size>::run(lhs,rhs);
+    triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex+1,Size>::run(lhs,rhs);
  }
 };

-template<typename Lhs, typename Rhs, int Mode, int Index, int Size>
-struct triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,true> {
+template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>
+struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,true> {
  static void run(const Lhs&, Rhs&) {}
 };

@@ -161,13 +161,6 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
 * TriangularView methods
 ***************************************************************************/

-/** "in-place" version of TriangularView::solve() where the result is written in \a other
-  *
-  * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
-  * This function will const_cast it, so constness isn't honored here.
-  *
-  * See TriangularView:solve() for the details.
-  */
 template<typename MatrixType, unsigned int Mode>
 template<int Side, typename OtherDerived>
 void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
@@ -188,27 +181,6 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot
    other = otherCopy;
 }

-/** \returns the product of the inverse of \c *this with \a other, \a *this being triangular.
-  *
-  * This function computes the inverse-matrix matrix product inverse(\c *this) * \a other if
-  * \a Side==OnTheLeft (the default), or the right-inverse-multiply  \a other * inverse(\c *this) if
-  * \a Side==OnTheRight.
-  *
-  * The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the
-  * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
-  * is an upper (resp. lower) triangular matrix.
-  *
-  * Example: \include Triangular_solve.cpp
-  * Output: \verbinclude Triangular_solve.out
-  *
-  * This function returns an expression of the inverse-multiply and can works in-place if it is assigned
-  * to the same matrix or vector \a other.
-  *
-  * For users coming from BLAS, this function (and more specifically solveInPlace()) offer
-  * all the operations supported by the \c *TRSV and \c *TRSM BLAS routines.
-  *
-  * \sa TriangularView::solveInPlace()
-  */
 template<typename Derived, unsigned int Mode>
 template<int Side, typename Other>
 const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other>
--- a/Eigen/src/Core/SolverBase.h
+++ b/Eigen/src/Core/SolverBase.h
@@ -0,0 +1,130 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SOLVERBASE_H
+#define EIGEN_SOLVERBASE_H
+
+namespace Eigen {
+
+namespace internal {
+
+
+
+} // end namespace internal
+
+/** \class SolverBase
+  * \brief A base class for matrix decomposition and solvers
+  *
+  * \tparam Derived the actual type of the decomposition/solver.
+  *
+  * Any matrix decomposition inheriting this base class provide the following API:
+  *
+  * \code
+  * MatrixType A, b, x;
+  * DecompositionType dec(A);
+  * x = dec.solve(b);             // solve A   * x = b
+  * x = dec.transpose().solve(b); // solve A^T * x = b
+  * x = dec.adjoint().solve(b);   // solve A'  * x = b
+  * \endcode
+  *
+  * \warning Currently, any other usage of transpose() and adjoint() are not supported and will produce compilation errors.
+  *
+  * \sa class PartialPivLU, class FullPivLU
+  */
+template<typename Derived>
+class SolverBase : public EigenBase<Derived>
+{
+  public:
+
+    typedef EigenBase<Derived> Base;
+    typedef typename internal::traits<Derived>::Scalar Scalar;
+    typedef Scalar CoeffReturnType;
+
+    enum {
+      RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+      ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+      SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
+                                                          internal::traits<Derived>::ColsAtCompileTime>::ret),
+      MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
+      MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
+                                                             internal::traits<Derived>::MaxColsAtCompileTime>::ret),
+      IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
+                           || internal::traits<Derived>::MaxColsAtCompileTime == 1
+    };
+
+    /** Default constructor */
+    SolverBase()
+    {}
+
+    ~SolverBase()
+    {}
+
+    using Base::derived;
+
+    /** \returns an expression of the solution x of \f$ A x = b \f$ using the current decomposition of A.
+      */
+    template<typename Rhs>
+    inline const Solve<Derived, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b");
+      return Solve<Derived, Rhs>(derived(), b.derived());
+    }
+
+    /** \internal the return type of transpose() */
+    typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
+    /** \returns an expression of the transposed of the factored matrix.
+      *
+      * A typical usage is to solve for the transposed problem A^T x = b:
+      * \code x = dec.transpose().solve(b); \endcode
+      *
+      * \sa adjoint(), solve()
+      */
+    inline ConstTransposeReturnType transpose() const
+    {
+      return ConstTransposeReturnType(derived());
+    }
+
+    /** \internal the return type of adjoint() */
+    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+                        CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
+                        ConstTransposeReturnType
+                     >::type AdjointReturnType;
+    /** \returns an expression of the adjoint of the factored matrix
+      *
+      * A typical usage is to solve for the adjoint problem A' x = b:
+      * \code x = dec.adjoint().solve(b); \endcode
+      *
+      * For real scalar types, this function is equivalent to transpose().
+      *
+      * \sa transpose(), solve()
+      */
+    inline AdjointReturnType adjoint() const
+    {
+      return AdjointReturnType(derived().transpose());
+    }
+
+  protected:
+};
+
+namespace internal {
+
+template<typename Derived>
+struct generic_xpr_base<Derived, MatrixXpr, SolverStorage>
+{
+  typedef SolverBase<Derived> type;
+
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SOLVERBASE_H
--- a/Eigen/src/Core/SpecialFunctions.h
+++ b/Eigen/src/Core/SpecialFunctions.h
@@ -0,0 +1,160 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPECIAL_FUNCTIONS_H
+#define EIGEN_SPECIAL_FUNCTIONS_H
+
+namespace Eigen {
+namespace internal {
+
+/****************************************************************************
+ * Implementation of lgamma                                                 *
+ ****************************************************************************/
+
+template<typename Scalar>
+struct lgamma_impl
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE Scalar run(const Scalar&)
+  {
+    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                        THIS_TYPE_IS_NOT_SUPPORTED);
+    return Scalar(0);
+  }
+};
+
+template<typename Scalar>
+struct lgamma_retval
+{
+  typedef Scalar type;
+};
+
+#ifdef EIGEN_HAS_C99_MATH
+template<>
+struct lgamma_impl<float>
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE double run(const float& x) { return ::lgammaf(x); }
+};
+
+template<>
+struct lgamma_impl<double>
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE double run(const double& x) { return ::lgamma(x); }
+};
+#endif
+
+/****************************************************************************
+ * Implementation of erf                                                    *
+ ****************************************************************************/
+
+template<typename Scalar>
+struct erf_impl
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE Scalar run(const Scalar&)
+  {
+    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                        THIS_TYPE_IS_NOT_SUPPORTED);
+    return Scalar(0);
+  }
+};
+
+template<typename Scalar>
+struct erf_retval
+{
+  typedef Scalar type;
+};
+
+#ifdef EIGEN_HAS_C99_MATH
+template<>
+struct erf_impl<float>
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE float run(const float& x) { return ::erff(x); }
+};
+
+template<>
+struct erf_impl<double>
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE double run(const double& x) { return ::erf(x); }
+};
+#endif  // EIGEN_HAS_C99_MATH
+
+/***************************************************************************
+* Implementation of erfc                                                   *
+****************************************************************************/
+
+template<typename Scalar>
+struct erfc_impl
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE Scalar run(const Scalar&)
+  {
+    EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                        THIS_TYPE_IS_NOT_SUPPORTED);
+    return Scalar(0);
+  }
+};
+
+template<typename Scalar>
+struct erfc_retval
+{
+  typedef Scalar type;
+};
+
+#ifdef EIGEN_HAS_C99_MATH
+template<>
+struct erfc_impl<float>
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); }
+};
+
+template<>
+struct erfc_impl<double>
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); }
+};
+#endif  // EIGEN_HAS_C99_MATH
+
+}  // end namespace internal
+
+
+namespace numext {
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) lgamma(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) erf(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) erfc(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
+}
+
+}  // end namespace numext
+
+}  // end namespace Eigen
+
+#endif  // EIGEN_SPECIAL_FUNCTIONS_H
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -39,7 +39,7 @@ struct traits<Transpose<MatrixType> > : public traits<MatrixType>
    MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,
    MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
    FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
-    Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit),
+    Flags0 = traits<MatrixTypeNestedPlain>::Flags & ~(LvalueBit | NestByRefBit),
    Flags1 = Flags0 | FlagsLvalueBit,
    Flags = Flags1 ^ RowMajorBit,
    InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -222,18 +222,23 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
    TriangularView& operator=(const TriangularView &other)
    { return Base::operator=(other); }

+    /** \copydoc EigenBase::rows() */
    EIGEN_DEVICE_FUNC
    inline Index rows() const { return m_matrix.rows(); }
+    /** \copydoc EigenBase::cols() */
    EIGEN_DEVICE_FUNC
    inline Index cols() const { return m_matrix.cols(); }

+    /** \returns a const reference to the nested expression */
    EIGEN_DEVICE_FUNC
    const NestedExpression& nestedExpression() const { return m_matrix; }
+
+    /** \returns a reference to the nested expression */
    EIGEN_DEVICE_FUNC
    NestedExpression& nestedExpression() { return *const_cast<NestedExpression*>(&m_matrix); }
    
-    /** \sa MatrixBase::conjugate() const */
    typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
+    /** \sa MatrixBase::conjugate() const */
    EIGEN_DEVICE_FUNC
    inline const ConjugateReturnType conjugate() const
    { return ConjugateReturnType(m_matrix.conjugate()); }
@@ -279,19 +284,28 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
    using Base::solve;
  #endif

-    EIGEN_DEVICE_FUNC
-    const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
-    {
-      EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
-      return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
-    }
+    /** \returns a selfadjoint view of the referenced triangular part which must be either \c #Upper or \c #Lower.
+      *
+      * This is a shortcut for \code this->nestedExpression().selfadjointView<(*this)::Mode>() \endcode
+      * \sa MatrixBase::selfadjointView() */
    EIGEN_DEVICE_FUNC
    SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
    {
-      EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
+      EIGEN_STATIC_ASSERT((Mode&(UnitDiag|ZeroDiag))==0,PROGRAMMING_ERROR);
      return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
    }

+    /** This is the const version of selfadjointView() */
+    EIGEN_DEVICE_FUNC
+    const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
+    {
+      EIGEN_STATIC_ASSERT((Mode&(UnitDiag|ZeroDiag))==0,PROGRAMMING_ERROR);
+      return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
+    }
+
+
+    /** \returns the determinant of the triangular matrix
+      * \sa MatrixBase::determinant() */
    EIGEN_DEVICE_FUNC
    Scalar determinant() const
    {
@@ -341,12 +355,16 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
      Flags = internal::traits<TriangularViewType>::Flags
    };

+    /** \returns the outer-stride of the underlying dense matrix
+      * \sa DenseCoeffsBase::outerStride() */
    EIGEN_DEVICE_FUNC
    inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
+    /** \returns the inner-stride of the underlying dense matrix
+      * \sa DenseCoeffsBase::innerStride() */
    EIGEN_DEVICE_FUNC
    inline Index innerStride() const { return derived().nestedExpression().innerStride(); }

-    /** \sa MatrixBase::operator+=() */    
+    /** \sa MatrixBase::operator+=() */
    template<typename Other>
    EIGEN_DEVICE_FUNC
    TriangularViewType&  operator+=(const DenseBase<Other>& other) {
@@ -364,7 +382,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
    /** \sa MatrixBase::operator*=() */
    EIGEN_DEVICE_FUNC
    TriangularViewType&  operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() * other; }
-    /** \sa MatrixBase::operator/=() */
+    /** \sa DenseBase::operator/=() */
    EIGEN_DEVICE_FUNC
    TriangularViewType&  operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() / other; }

@@ -408,21 +426,26 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
    EIGEN_DEVICE_FUNC
    TriangularViewType& operator=(const TriangularBase<OtherDerived>& other);

+    /** Shortcut for\code *this = other.other.triangularView<(*this)::Mode>() \endcode */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    TriangularViewType& operator=(const MatrixBase<OtherDerived>& other);

+#ifndef EIGEN_PARSED_BY_DOXYGEN
    EIGEN_DEVICE_FUNC
    TriangularViewType& operator=(const TriangularViewImpl& other)
    { return *this = other.derived().nestedExpression(); }

+    /** \deprecated */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    void lazyAssign(const TriangularBase<OtherDerived>& other);

+    /** \deprecated */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
-    void lazyAssign(const MatrixBase<OtherDerived>& other);  
+    void lazyAssign(const MatrixBase<OtherDerived>& other);
+#endif

    /** Efficient triangular matrix times vector/matrix product */
    template<typename OtherDerived>
@@ -442,11 +465,39 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
      return Product<OtherDerived,TriangularViewType>(lhs.derived(),rhs.derived());
    }

+    /** \returns the product of the inverse of \c *this with \a other, \a *this being triangular.
+      *
+      * This function computes the inverse-matrix matrix product inverse(\c *this) * \a other if
+      * \a Side==OnTheLeft (the default), or the right-inverse-multiply  \a other * inverse(\c *this) if
+      * \a Side==OnTheRight.
+      *
+      * The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the
+      * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
+      * is an upper (resp. lower) triangular matrix.
+      *
+      * Example: \include Triangular_solve.cpp
+      * Output: \verbinclude Triangular_solve.out
+      *
+      * This function returns an expression of the inverse-multiply and can works in-place if it is assigned
+      * to the same matrix or vector \a other.
+      *
+      * For users coming from BLAS, this function (and more specifically solveInPlace()) offer
+      * all the operations supported by the \c *TRSV and \c *TRSM BLAS routines.
+      *
+      * \sa TriangularView::solveInPlace()
+      */
    template<int Side, typename Other>
    EIGEN_DEVICE_FUNC
    inline const internal::triangular_solve_retval<Side,TriangularViewType, Other>
    solve(const MatrixBase<Other>& other) const;

+    /** "in-place" version of TriangularView::solve() where the result is written in \a other
+      *
+      * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
+      * This function will const_cast it, so constness isn't honored here.
+      *
+      * See TriangularView:solve() for the details.
+      */
    template<int Side, typename OtherDerived>
    EIGEN_DEVICE_FUNC
    void solveInPlace(const MatrixBase<OtherDerived>& other) const;
@@ -456,18 +507,26 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
    void solveInPlace(const MatrixBase<OtherDerived>& other) const
    { return solveInPlace<OnTheLeft>(other); }

+    /** Swaps the coefficients of the common triangular parts of two matrices */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+    void swap(TriangularBase<OtherDerived> &other)
+#else
    void swap(TriangularBase<OtherDerived> const & other)
+#endif
    {
+      EIGEN_STATIC_ASSERT_LVALUE(OtherDerived);
      call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
    }

-    // TODO: this overload is ambiguous and it should be deprecated (Gael)
+    /** \deprecated
+      * Shortcut for \code (*this).swap(other.triangularView<(*this)::Mode>()) \endcode */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    void swap(MatrixBase<OtherDerived> const & other)
    {
+      EIGEN_STATIC_ASSERT_LVALUE(OtherDerived);
      call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
    }

@@ -503,7 +562,7 @@ template<typename MatrixType, unsigned int Mode>
 template<typename OtherDerived>
 void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other)
 {
-  internal::call_assignment(derived().noalias(), other.template triangularView<Mode>());
+  internal::call_assignment_no_alias(derived(), other.template triangularView<Mode>());
 }


@@ -523,7 +582,7 @@ template<typename OtherDerived>
 void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other)
 {
  eigen_assert(Mode == int(OtherDerived::Mode));
-  internal::call_assignment(derived().noalias(), other.derived());
+  internal::call_assignment_no_alias(derived(), other.derived());
 }

 /***************************************************************************
@@ -745,7 +804,7 @@ EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, co
  
  enum {
      unroll = DstXprType::SizeAtCompileTime != Dynamic
-            && SrcEvaluatorType::CoeffReadCost != Dynamic
+            && SrcEvaluatorType::CoeffReadCost < HugeCost
            && DstXprType::SizeAtCompileTime * SrcEvaluatorType::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT
    };
  
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -11,7 +11,7 @@
 #ifndef EIGEN_PARTIAL_REDUX_H
 #define EIGEN_PARTIAL_REDUX_H

-namespace Eigen { 
+namespace Eigen {

 /** \class PartialReduxExpr
  * \ingroup Core_Module
@@ -41,8 +41,6 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
  typedef typename traits<MatrixType>::StorageKind StorageKind;
  typedef typename traits<MatrixType>::XprKind XprKind;
  typedef typename MatrixType::Scalar InputScalar;
-  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
-  typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
  enum {
    RowsAtCompileTime = Direction==Vertical   ? 1 : MatrixType::RowsAtCompileTime,
    ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,
@@ -62,8 +60,6 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<Matri

    typedef typename internal::dense_xpr_base<PartialReduxExpr>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr)
-    typedef typename internal::traits<PartialReduxExpr>::MatrixTypeNested MatrixTypeNested;
-    typedef typename internal::traits<PartialReduxExpr>::_MatrixTypeNested _MatrixTypeNested;

    EIGEN_DEVICE_FUNC
    explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
@@ -74,24 +70,14 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<Matri
    EIGEN_DEVICE_FUNC
    Index cols() const { return (Direction==Horizontal ? 1 : m_matrix.cols()); }

-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const
-    {
-      if (Direction==Vertical)
-        return m_functor(m_matrix.col(j));
-      else
-        return m_functor(m_matrix.row(i));
-    }
+    EIGEN_DEVICE_FUNC
+    typename MatrixType::Nested nestedExpression() const { return m_matrix; }

-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
-    {
-      if (Direction==Vertical)
-        return m_functor(m_matrix.col(index));
-      else
-        return m_functor(m_matrix.row(index));
-    }
+    EIGEN_DEVICE_FUNC
+    const MemberOp& functor() const { return m_functor; }

  protected:
-    MatrixTypeNested m_matrix;
+    typename MatrixType::Nested m_matrix;
    const MemberOp m_functor;
 };

@@ -124,6 +110,16 @@ EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);

+template <int p, typename ResultType>
+struct member_lpnorm {
+  typedef ResultType result_type;
+  template<typename Scalar, int Size> struct Cost
+  { enum { value = (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost }; };
+  EIGEN_DEVICE_FUNC member_lpnorm() {}
+  template<typename XprType>
+  EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const
+  { return mat.template lpNorm<p>(); }
+};

 template <typename BinaryOp, typename Scalar>
 struct member_redux {
@@ -230,7 +226,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
                       isVertical   ? 1 : m_matrix.rows(),
                       isHorizontal ? 1 : m_matrix.cols());
    }
-    
+
    template<typename OtherDerived> struct OppositeExtendedType {
      typedef Replicate<OtherDerived,
                        isHorizontal ? 1 : ExpressionType::RowsAtCompileTime,
@@ -290,9 +286,13 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
    typedef typename ReturnType<internal::member_prod>::Type ProdReturnType;
    typedef Reverse<ExpressionType, Direction> ReverseReturnType;

+    template<int p> struct LpNormReturnType {
+      typedef PartialReduxExpr<ExpressionType, internal::member_lpnorm<p,RealScalar>,Direction> Type;
+    };
+
    /** \returns a row (or column) vector expression of the smallest coefficient
      * of each column (or row) of the referenced expression.
-      * 
+      *
      * \warning the result is undefined if \c *this contains NaN.
      *
      * Example: \include PartialRedux_minCoeff.cpp
@@ -305,7 +305,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp

    /** \returns a row (or column) vector expression of the largest coefficient
      * of each column (or row) of the referenced expression.
-      * 
+      *
      * \warning the result is undefined if \c *this contains NaN.
      *
      * Example: \include PartialRedux_maxCoeff.cpp
@@ -340,10 +340,23 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
    const NormReturnType norm() const
    { return NormReturnType(_expression()); }

+    /** \returns a row (or column) vector expression of the norm
+      * of each column (or row) of the referenced expression.
+      * This is a vector with real entries, even if the original matrix has complex entries.
+      *
+      * Example: \include PartialRedux_norm.cpp
+      * Output: \verbinclude PartialRedux_norm.out
+      *
+      * \sa DenseBase::norm() */
+    template<int p>
+    EIGEN_DEVICE_FUNC
+    const typename LpNormReturnType<p>::Type lpNorm() const
+    { return typename LpNormReturnType<p>::Type(_expression()); }
+

    /** \returns a row (or column) vector expression of the norm
      * of each column (or row) of the referenced expression, using
-      * Blue's algorithm. 
+      * Blue's algorithm.
      * This is a vector with real entries, even if the original matrix has complex entries.
      *
      * \sa DenseBase::blueNorm() */
@@ -408,7 +421,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
      * \sa DenseBase::any() */
    EIGEN_DEVICE_FUNC
    const AnyReturnType any() const
-    { return Any(_expression()); }
+    { return AnyReturnType(_expression()); }

    /** \returns a row (or column) vector expression representing
      * the number of \c true coefficients of each respective column (or row).
@@ -579,7 +592,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
      return m_matrix / extendedTo(other.derived());
    }
-    
+
    /** \returns an expression where each column of row of the referenced matrix are normalized.
      * The referenced matrix is \b not modified.
      * \sa MatrixBase::normalized(), normalize()
@@ -589,8 +602,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
                  const ExpressionTypeNestedCleaned,
                  const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type>
    normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); }
-    
-    
+
+
    /** Normalize in-place each row or columns of the referenced matrix.
      * \sa MatrixBase::normalize(), normalized()
      */
--- a/Eigen/src/Core/Visitor.h
+++ b/Eigen/src/Core/Visitor.h
@@ -109,14 +109,11 @@ void DenseBase<Derived>::visit(Visitor& visitor) const
  typedef typename internal::visitor_evaluator<Derived> ThisEvaluator;
  ThisEvaluator thisEval(derived());
  
-  enum { unroll =   SizeAtCompileTime != Dynamic
-                &&  ThisEvaluator::CoeffReadCost != Dynamic
-                &&  (SizeAtCompileTime == 1 || internal::functor_traits<Visitor>::Cost != Dynamic)
-                &&  SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost
-                <= EIGEN_UNROLLING_LIMIT };
-  return internal::visitor_impl<Visitor, ThisEvaluator,
-      unroll ? int(SizeAtCompileTime) : Dynamic
-    >::run(thisEval, visitor);
+  enum {
+    unroll =  SizeAtCompileTime != Dynamic
+           && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost <= EIGEN_UNROLLING_LIMIT
+  };
+  return internal::visitor_impl<Visitor, ThisEvaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(thisEval, visitor);
 }

 namespace internal {
--- a/Eigen/src/Core/arch/AVX/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX/MathFunctions.h
@@ -38,10 +38,10 @@ psin<Packet8f>(const Packet8f& _x) {
  _EIGEN_DECLARE_CONST_Packet8f(two, 2.0f);
  _EIGEN_DECLARE_CONST_Packet8f(one_over_four, 0.25f);
  _EIGEN_DECLARE_CONST_Packet8f(one_over_pi, 3.183098861837907e-01f);
-  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00);
-  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04);
-  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07);
-  _EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00);
+  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00f);
+  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04f);
+  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07f);
+  _EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00f);

  // Map x from [-Pi/4,3*Pi/4] to z in [-1,3] and subtract the shifted period.
  Packet8f z = pmul(x, p8f_one_over_pi);
@@ -55,14 +55,14 @@ psin<Packet8f>(const Packet8f& _x) {
  // is odd.
  Packet8i shift_ints = _mm256_cvtps_epi32(shift);
  Packet8i shift_isodd =
-      (__m256i)_mm256_and_ps((__m256)shift_ints, (__m256)p8i_one);
+      _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
 #ifdef EIGEN_VECTORIZE_AVX2
  Packet8i sign_flip_mask = _mm256_slli_epi32(shift_isodd, 31);
 #else
  __m128i lo =
-      _mm_slli_epi32(_mm256_extractf128_si256((__m256i)shift_isodd, 0), 31);
+      _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 0), 31);
  __m128i hi =
-      _mm_slli_epi32(_mm256_extractf128_si256((__m256i)shift_isodd, 1), 31);
+      _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 1), 31);
  Packet8i sign_flip_mask = _mm256_setr_m128(lo, hi);
 #endif

@@ -72,9 +72,9 @@ psin<Packet8f>(const Packet8f& _x) {

  // Evaluate the polynomial for the interval [1,3] in z.
  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_0, 9.999999724233232e-01f);
-  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01);
-  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02);
-  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04f);
  Packet8f z_minus_two = psub(z, p8f_two);
  Packet8f z_minus_two2 = pmul(z_minus_two, z_minus_two);
  Packet8f right = pmadd(p8f_coeff_right_6, z_minus_two2, p8f_coeff_right_4);
@@ -82,10 +82,10 @@ psin<Packet8f>(const Packet8f& _x) {
  right = pmadd(right, z_minus_two2, p8f_coeff_right_0);

  // Evaluate the polynomial for the interval [-1,1] in z.
-  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01);
-  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02);
-  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03);
-  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05f);
  Packet8f z2 = pmul(z, z);
  Packet8f left = pmadd(p8f_coeff_left_7, z2, p8f_coeff_left_5);
  left = pmadd(left, z2, p8f_coeff_left_3);
@@ -98,7 +98,7 @@ psin<Packet8f>(const Packet8f& _x) {
  Packet8f res = _mm256_or_ps(left, right);

  // Flip the sign on the odd intervals and return the result.
-  res = _mm256_xor_ps(res, (__m256)sign_flip_mask);
+  res = _mm256_xor_ps(res, _mm256_castsi256_ps(sign_flip_mask));
  return res;
 }

@@ -145,10 +145,10 @@ plog<Packet8f>(const Packet8f& _x) {
 // Extract the shifted exponents (No bitwise shifting in regular AVX, so
 // convert to SSE and do it there).
 #ifdef EIGEN_VECTORIZE_AVX2
-  Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_srli_epi32((__m256i)x, 23));
+  Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(x), 23));
 #else
-  __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256((__m256i)x, 0), 23);
-  __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256((__m256i)x, 1), 23);
+  __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 0), 23);
+  __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 1), 23);
  Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_setr_m128(lo, hi));
 #endif
  Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
@@ -348,7 +348,7 @@ pexp<Packet4d>(const Packet4d& _x) {

  // Construct the result 2^n * exp(g) = e * x. The max is used to catch
  // non-finite values in the input.
-  return pmax(pmul(x, Packet4d(e)), _x);
+  return pmax(pmul(x, _mm256_castsi256_pd(e)), _x);
 }

 // Functions for sqrt.
@@ -393,7 +393,7 @@ Packet4d psqrt<Packet4d>(const Packet4d& x) {

 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
- _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
+  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000);
  _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
  _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -43,7 +43,7 @@ template<> struct is_arithmetic<__m256d> { enum { value = true }; };
  const Packet4d p4d_##NAME = pset1<Packet4d>(X)

 #define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
-  const Packet8f p8f_##NAME = (__m256)pset1<Packet8i>(X)
+  const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))

 #define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
  const Packet8i p8i_##NAME = pset1<Packet8i>(X)
@@ -66,7 +66,10 @@ template<> struct packet_traits<float>  : default_packet_traits
    HasExp  = 1,
    HasSqrt = 1,
    HasRsqrt = 1,
-    HasBlend = 1
+    HasBlend = 1,
+    HasRound = 1,
+    HasFloor = 1,
+    HasCeil = 1
  };
 };
 template<> struct packet_traits<double> : default_packet_traits
@@ -83,7 +86,10 @@ template<> struct packet_traits<double> : default_packet_traits
    HasExp  = 1,
    HasSqrt = 1,
    HasRsqrt = 1,
-    HasBlend = 1
+    HasBlend = 1,
+    HasRound = 1,
+    HasFloor = 1,
+    HasCeil = 1
  };
 };

@@ -176,6 +182,15 @@ template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const
 template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }

+template<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); }
+template<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pceil<Packet8f>(const Packet8f& a) { return _mm256_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet4d pceil<Packet4d>(const Packet4d& a) { return _mm256_ceil_pd(a); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pfloor<Packet8f>(const Packet8f& a) { return _mm256_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet4d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }
+
 template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }

--- a/Eigen/src/Core/arch/CUDA/MathFunctions.h
+++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h
@@ -66,6 +66,43 @@ double2 prsqrt<double2>(const double2& a)
  return make_double2(rsqrt(a.x), rsqrt(a.y));
 }

+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 plgamma<float4>(const float4& a)
+{
+  return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 plgamma<double2>(const double2& a)
+{
+  return make_double2(lgamma(a.x), lgamma(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 perf<float4>(const float4& a)
+{
+  return make_float4(erf(a.x), erf(a.y), erf(a.z), erf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 perf<double2>(const double2& a)
+{
+  return make_double2(erf(a.x), erf(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 perfc<float4>(const float4& a)
+{
+  return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 perfc<double2>(const double2& a)
+{
+  return make_double2(erfc(a.x), erfc(a.y));
+}
+
+
 #endif

 } // end namespace internal
--- a/Eigen/src/Core/arch/CUDA/PacketMath.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMath.h
@@ -39,6 +39,9 @@ template<> struct packet_traits<float> : default_packet_traits
    HasExp  = 1,
    HasSqrt = 1,
    HasRsqrt = 1,
+    HasLGamma = 1,
+    HasErf = 1,
+    HasErfc = 1,

    HasBlend = 0,
  };
@@ -59,6 +62,9 @@ template<> struct packet_traits<double> : default_packet_traits
    HasExp  = 1,
    HasSqrt = 1,
    HasRsqrt = 1,
+    HasLGamma = 1,
+    HasErf = 1,
+    HasErfc = 1,

    HasBlend = 0,
  };
@@ -177,7 +183,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to
  to[1] = from.y;
 }

-#ifdef __CUDA_ARCH__
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
 template<>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
  return __ldg((const float4*)from);
@@ -244,6 +250,13 @@ template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a)
  return fmin(a.x, a.y);
 }

+template<> EIGEN_DEVICE_FUNC inline float  predux_mul<float4>(const float4& a) {
+  return a.x * a.y * a.z * a.w;
+}
+template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {
+  return a.x * a.y;
+}
+
 template<> EIGEN_DEVICE_FUNC inline float4  pabs<float4>(const float4& a) {
  return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
 }
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -73,7 +73,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con

  // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
  v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
-  // Get the real values of a | a1_im | a1_im | a2_im | a2_im |
+  // Get the imag values of a | a1_im | a1_im | a2_im | a2_im |
  v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
  // Multiply the real a with b
  v1 = vmulq_f32(v1, b.v);
@@ -325,8 +325,8 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con

  // Get the real values of a 
  v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
-  // Get the real values of a 
-  v2 = vdupq_lane_f64(vget_high_f64(a.v), 1);
+  // Get the imag values of a
+  v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
  // Multiply the real a with b
  v1 = vmulq_f64(v1, b.v);
  // Multiply the imag a with b
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -67,7 +67,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)

 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
 {
-  // TODO optimize it for SSE3 and 4
  #ifdef EIGEN_VECTORIZE_SSE3
  return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
                                 _mm_mul_ps(_mm_movehdup_ps(a.v),
@@ -310,9 +309,8 @@ template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)

 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
 {
-  // TODO optimize it for SSE3 and 4
  #ifdef EIGEN_VECTORIZE_SSE3
-  return Packet1cd(_mm_addsub_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
+  return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v),
                                 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
                                            vec2d_swizzle1(b.v, 1, 0))));
  #else
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -110,6 +110,13 @@ template<> struct packet_traits<float>  : default_packet_traits
    HasSqrt = 1,
    HasRsqrt = 1,
    HasBlend = 1
+
+#ifdef EIGEN_VECTORIZE_SSE4_1
+    ,
+    HasRound = 1,
+    HasFloor = 1,
+    HasCeil = 1
+#endif
  };
 };
 template<> struct packet_traits<double> : default_packet_traits
@@ -127,6 +134,13 @@ template<> struct packet_traits<double> : default_packet_traits
    HasSqrt = 1,
    HasRsqrt = 1,
    HasBlend = 1
+
+#ifdef EIGEN_VECTORIZE_SSE4_1
+    ,
+    HasRound = 1,
+    HasFloor = 1,
+    HasCeil = 1
+#endif
  };
 };
 #endif
@@ -135,7 +149,6 @@ template<> struct packet_traits<int>    : default_packet_traits
  typedef Packet4i type;
  typedef Packet4i half;
  enum {
-    // FIXME check the Has*
    Vectorizable = 1,
    AlignedOnScalar = 1,
    size=4,
@@ -223,10 +236,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const

 template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
 template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
-template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
-{ eigen_assert(false && "packet integer division are not supported by SSE");
-  return pset1<Packet4i>(0);
-}

 // for some weird raisons, it has to be overloaded for packet of integers
 template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
@@ -261,6 +270,17 @@ template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const
 #endif
 }

+#ifdef EIGEN_VECTORIZE_SSE4_1
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, 0); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
+#endif
+
 template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
 template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
@@ -287,8 +307,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int*     from) { E
    #if (EIGEN_COMP_MSVC==1600)
    // NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps
    // (i.e., it does not generate an unaligned load!!
-    // TODO On most architectures this version should also be faster than a single _mm_loadu_ps
-    // so we could also enable it for MSVC08 but first we have to make this later does not generate crap when doing so...
    __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
    res = _mm_loadh_pi(res, (const __m64*)(from+2));
    return res;
@@ -299,24 +317,16 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int*     from) { E
  template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
  template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int*    from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); }
 #else
-// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would
-// require pointer casting to incompatible pointer types and leads to invalid code
-// because of the strict aliasing rule. The "dummy" stuff are required to enforce
-// a correct instruction dependency.
-// TODO: do the same for MSVC (ICC is compatible)
 // NOTE: with the code below, MSVC's compiler crashes!

 #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386 || (EIGEN_ARCH_x86_64 && EIGEN_GNUC_AT_LEAST(4, 8)))
  // bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd
  #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
-  #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 1
 #elif EIGEN_COMP_CLANG
  // bug 201: Segfaults in __mm_loadh_pd with clang 2.8
  #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
-  #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0
 #else
  #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0
-  #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0
 #endif

 template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
@@ -374,17 +384,9 @@ template<> EIGEN_STRONG_INLINE void pstore<float>(float*   to, const Packet4f& f
 template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
 template<> EIGEN_STRONG_INLINE void pstore<int>(int*       to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }

-template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
-  EIGEN_DEBUG_UNALIGNED_STORE
-#if EIGEN_AVOID_CUSTOM_UNALIGNED_STORES
-  _mm_storeu_pd(to, from);
-#else
-  _mm_storel_pd((to), from);
-  _mm_storeh_pd((to+1), from);
-#endif
-}
-template<> EIGEN_STRONG_INLINE void pstoreu<float>(float*  to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castps_pd(from))); }
-template<> EIGEN_STRONG_INLINE void pstoreu<int>(int*      to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), Packet2d(_mm_castsi128_pd(from))); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float*   to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int*       to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }

 template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
 {
@@ -547,7 +549,6 @@ EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
 }

 #ifdef EIGEN_VECTORIZE_SSE3
-// TODO implement SSE2 versions as well as integer versions
 template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
 {
  return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
@@ -556,11 +557,6 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
 {
  return _mm_hadd_pd(vecs[0], vecs[1]);
 }
-// SSSE3 version:
-// EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs)
-// {
-//   return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
-// }

 template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
 {
@@ -569,23 +565,16 @@ template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
 }

 template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return pfirst<Packet2d>(_mm_hadd_pd(a, a)); }
-
-// SSSE3 version:
-// EIGEN_STRONG_INLINE float predux(const Packet4i& a)
-// {
-//   Packet4i tmp0 = _mm_hadd_epi32(a,a);
-//   return pfirst(_mm_hadd_epi32(tmp0, tmp0));
-// }
 #else
 // SSE2 versions
 template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
 {
  Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
-  return pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+  return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
 }
 template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
 {
-  return pfirst(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
+  return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
 }

 template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
@@ -608,6 +597,18 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
 }
 #endif  // SSE3

+
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+  return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+  Packet4i tmp0 = _mm_hadd_epi32(a,a);
+  return pfirst<Packet4i>(_mm_hadd_epi32(tmp0,tmp0));
+}
+#else
 template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
 {
  Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
@@ -627,7 +628,7 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
  tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
  return _mm_add_epi32(tmp0, tmp2);
 }
-
+#endif
 // Other reduction functions:

 // mul
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -26,10 +26,10 @@ template<typename Scalar> struct scalar_sum_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return internal::padd(a,b); }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
  { return internal::predux(a); }
 };
 template<typename Scalar>
@@ -65,10 +65,10 @@ template<typename LhsScalar,typename RhsScalar> struct scalar_product_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return internal::pmul(a,b); }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
  { return internal::predux_mul(a); }
 };
 template<typename LhsScalar,typename RhsScalar>
@@ -97,7 +97,7 @@ template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op {
  { return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
  
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
 };
 template<typename LhsScalar,typename RhsScalar>
@@ -117,10 +117,10 @@ template<typename Scalar> struct scalar_min_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::mini(a, b); }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return internal::pmin(a,b); }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
  { return internal::predux_min(a); }
 };
 template<typename Scalar>
@@ -140,10 +140,10 @@ template<typename Scalar> struct scalar_max_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::maxi(a, b); }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return internal::pmax(a,b); }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
  { return internal::predux_max(a); }
 };
 template<typename Scalar>
@@ -175,22 +175,37 @@ struct result_of<scalar_cmp_op<Scalar, Cmp>(Scalar,Scalar)> {


 template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_EQ> {
+  typedef bool result_type;
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;}
 };
 template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LT> {
+  typedef bool result_type;
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<b;}
 };
 template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LE> {
+  typedef bool result_type;
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;}
 };
+template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_GT> {
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>b;}
+};
+template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_GE> {
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>=b;}
+};
 template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_UNORD> {
+  typedef bool result_type;
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);}
 };
 template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_NEQ> {
+  typedef bool result_type;
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;}
 };
@@ -252,7 +267,7 @@ template<typename Scalar> struct scalar_difference_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return internal::psub(a,b); }
 };
 template<typename Scalar>
@@ -277,7 +292,7 @@ template<typename LhsScalar,typename RhsScalar> struct scalar_quotient_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return internal::pdiv(a,b); }
 };
 template<typename LhsScalar,typename RhsScalar>
@@ -349,7 +364,7 @@ struct scalar_multiple_op {
  EIGEN_DEVICE_FUNC
  EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
  template <typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
  { return internal::pmul(a, pset1<Packet>(m_other)); }
  typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
 };
@@ -384,7 +399,7 @@ struct scalar_quotient1_op {
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
  template <typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
  { return internal::pdiv(a, pset1<Packet>(m_other)); }
  typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
 };
@@ -426,7 +441,7 @@ struct scalar_add_op {
  EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { }
  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; }
  template <typename Packet>
-  inline const Packet packetOp(const Packet& a) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
  { return internal::padd(a, pset1<Packet>(m_other)); }
  const Scalar m_other;
 };
@@ -440,11 +455,11 @@ struct functor_traits<scalar_add_op<Scalar> >
  */
 template<typename Scalar>
 struct scalar_sub_op {
-  inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { }
-  inline scalar_sub_op(const Scalar& other) : m_other(other) { }
-  inline Scalar operator() (const Scalar& a) const { return a - m_other; }
+  EIGEN_DEVICE_FUNC inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { }
+  EIGEN_DEVICE_FUNC inline scalar_sub_op(const Scalar& other) : m_other(other) { }
+  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a - m_other; }
  template <typename Packet>
-  inline const Packet packetOp(const Packet& a) const
+  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
  { return internal::psub(a, pset1<Packet>(m_other)); }
  const Scalar m_other;
 };
@@ -458,11 +473,11 @@ struct functor_traits<scalar_sub_op<Scalar> >
  */
 template<typename Scalar>
 struct scalar_rsub_op {
-  inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { }
-  inline scalar_rsub_op(const Scalar& other) : m_other(other) { }
-  inline Scalar operator() (const Scalar& a) const { return m_other - a; }
+  EIGEN_DEVICE_FUNC inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { }
+  EIGEN_DEVICE_FUNC inline scalar_rsub_op(const Scalar& other) : m_other(other) { }
+  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other - a; }
  template <typename Packet>
-  inline const Packet packetOp(const Packet& a) const
+  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
  { return internal::psub(pset1<Packet>(m_other), a); }
  const Scalar m_other;
 };
@@ -477,8 +492,8 @@ struct functor_traits<scalar_rsub_op<Scalar> >
 template<typename Scalar>
 struct scalar_pow_op {
  // FIXME default copy constructors seems bugged with std::complex<>
-  inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { }
-  inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {}
+  EIGEN_DEVICE_FUNC inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { }
+  EIGEN_DEVICE_FUNC inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {}
  EIGEN_DEVICE_FUNC
  inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); }
  const Scalar m_exponent;
@@ -493,10 +508,10 @@ struct functor_traits<scalar_pow_op<Scalar> >
  */
 template<typename Scalar>
 struct scalar_inverse_mult_op {
-  scalar_inverse_mult_op(const Scalar& other) : m_other(other) {}
+  EIGEN_DEVICE_FUNC scalar_inverse_mult_op(const Scalar& other) : m_other(other) {}
  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; }
  template<typename Packet>
-  inline const Packet packetOp(const Packet& a) const
+  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
  { return internal::pdiv(pset1<Packet>(m_other),a); }
  Scalar m_other;
 };
--- a/Eigen/src/Core/functors/NullaryFunctors.h
+++ b/Eigen/src/Core/functors/NullaryFunctors.h
@@ -21,12 +21,11 @@ struct scalar_constant_op {
  template<typename Index>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
  template<typename Index, typename PacketType>
-  EIGEN_STRONG_INLINE const PacketType packetOp(Index, Index = 0) const { return internal::pset1<PacketType>(m_other); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp(Index, Index = 0) const { return internal::pset1<PacketType>(m_other); }
  const Scalar m_other;
 };
 template<typename Scalar>
 struct functor_traits<scalar_constant_op<Scalar> >
-// FIXME replace this packet test by a safe one
 { enum { Cost = 1, PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };

 template<typename Scalar> struct scalar_identity_op {
@@ -64,7 +63,7 @@ struct linspaced_op_impl<Scalar,Packet,false>
  }

  template<typename Index>
-  EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }

  const Scalar m_low;
  const Scalar m_step;
@@ -86,7 +85,7 @@ struct linspaced_op_impl<Scalar,Packet,true>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }

  template<typename Index>
-  EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
  { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); }

  const Scalar m_low;
@@ -121,12 +120,12 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
  }

  template<typename Index, typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }

  // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
  // there row==0 and col is used for the actual iteration.
  template<typename Index, typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
  {
    eigen_assert(col==0 || row==0);
    return impl.packetOp(col + row);
@@ -135,14 +134,12 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
  // This proxy object handles the actual required temporaries, the different
  // implementations (random vs. sequential access) as well as the
  // correct piping to size 2/4 packet operations.
-  // TODO find a way to make the packet type configurable
  const linspaced_op_impl<Scalar,PacketType,RandomAccess> impl;
 };

 // all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
 // to indicate whether a functor allows linear access, just always answering 'yes' except for
 // scalar_identity_op.
-// FIXME move this to functor_traits adding a functor_default
 template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; };
 template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; };

--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -23,7 +23,7 @@ template<typename Scalar> struct scalar_opposite_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
  { return internal::pnegate(a); }
 };
 template<typename Scalar>
@@ -43,7 +43,7 @@ template<typename Scalar> struct scalar_abs_op {
  typedef typename NumTraits<Scalar>::Real result_type;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
  { return internal::pabs(a); }
 };
 template<typename Scalar>
@@ -94,7 +94,7 @@ template<typename Scalar> struct scalar_abs2_op {
  EIGEN_DEVICE_FUNC
  EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
  { return internal::pmul(a,a); }
 };
 template<typename Scalar>
@@ -111,7 +111,7 @@ template<typename Scalar> struct scalar_conjugate_op {
  EIGEN_DEVICE_FUNC
  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_conjugate_op<Scalar> >
@@ -132,7 +132,7 @@ template<typename Scalar> struct scalar_arg_op {
  typedef typename NumTraits<Scalar>::Real result_type;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using numext::arg; return arg(a); }
  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
  { return internal::parg(a); }
 };
 template<typename Scalar>
@@ -232,7 +232,7 @@ template<typename Scalar> struct scalar_exp_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_exp_op<Scalar> >
@@ -248,7 +248,7 @@ template<typename Scalar> struct scalar_log_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_log_op<Scalar> >
@@ -264,7 +264,7 @@ template<typename Scalar> struct scalar_log10_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op)
  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log10; return log10(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_log10_op<Scalar> >
@@ -278,7 +278,7 @@ template<typename Scalar> struct scalar_sqrt_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_sqrt_op<Scalar> >
@@ -296,7 +296,7 @@ template<typename Scalar> struct scalar_rsqrt_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op)
  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return Scalar(1)/sqrt(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); }
 };

 template<typename Scalar>
@@ -315,7 +315,7 @@ template<typename Scalar> struct scalar_cos_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_cos_op<Scalar> >
@@ -334,7 +334,7 @@ template<typename Scalar> struct scalar_sin_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_sin_op<Scalar> >
@@ -354,7 +354,7 @@ template<typename Scalar> struct scalar_tan_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_tan_op<Scalar> >
@@ -373,7 +373,7 @@ template<typename Scalar> struct scalar_acos_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_acos_op<Scalar> >
@@ -392,7 +392,7 @@ template<typename Scalar> struct scalar_asin_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_asin_op<Scalar> >
@@ -403,15 +403,86 @@ struct functor_traits<scalar_asin_op<Scalar> >
  };
 };

+
+/** \internal
+ * \brief Template functor to compute the natural log of the absolute
+ * value of Gamma of a scalar
+ * \sa class CwiseUnaryOp, Cwise::lgamma()
+ */
+template<typename Scalar> struct scalar_lgamma_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+    using numext::lgamma; return lgamma(a);
+  }
+  typedef typename packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_lgamma_op<Scalar> >
+{
+  enum {
+    // Guesstimate
+    Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasLGamma
+  };
+};
+
+/** \internal
+ * \brief Template functor to compute the Gauss error function of a
+ * scalar
+ * \sa class CwiseUnaryOp, Cwise::erf()
+ */
+template<typename Scalar> struct scalar_erf_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+    using numext::erf; return erf(a);
+  }
+  typedef typename packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return internal::perf(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_erf_op<Scalar> >
+{
+  enum {
+    // Guesstimate
+    Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasErf
+  };
+};
+
+/** \internal
+ * \brief Template functor to compute the Complementary Error Function
+ * of a scalar
+ * \sa class CwiseUnaryOp, Cwise::erfc()
+ */
+template<typename Scalar> struct scalar_erfc_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+    using numext::erfc; return erfc(a);
+  }
+  typedef typename packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return internal::perfc(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_erfc_op<Scalar> >
+{
+  enum {
+    // Guesstimate
+    Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasErfc
+  };
+};
+
+
 /** \internal
  * \brief Template functor to compute the atan of a scalar
  * \sa class CwiseUnaryOp, ArrayBase::atan()
  */
 template<typename Scalar> struct scalar_atan_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::atan; return atan(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::atan; return atan(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::patan(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::patan(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_atan_op<Scalar> >
@@ -422,15 +493,16 @@ struct functor_traits<scalar_atan_op<Scalar> >
  };
 };

+
 /** \internal
  * \brief Template functor to compute the tanh of a scalar
  * \sa class CwiseUnaryOp, ArrayBase::tanh()
  */
 template<typename Scalar> struct scalar_tanh_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::tanh; return tanh(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tanh; return tanh(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_tanh_op<Scalar> >
@@ -447,9 +519,9 @@ struct functor_traits<scalar_tanh_op<Scalar> >
  */
 template<typename Scalar> struct scalar_sinh_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_sinh_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::sinh; return sinh(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sinh; return sinh(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::psinh(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psinh(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_sinh_op<Scalar> >
@@ -466,9 +538,9 @@ struct functor_traits<scalar_sinh_op<Scalar> >
  */
 template<typename Scalar> struct scalar_cosh_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cosh_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::cosh; return cosh(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::cosh; return cosh(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_cosh_op<Scalar> >
@@ -488,7 +560,7 @@ struct scalar_inverse_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
  template<typename Packet>
-  inline const Packet packetOp(const Packet& a) const
+  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
  { return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
 };
 template<typename Scalar>
@@ -504,7 +576,7 @@ struct scalar_square_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a; }
  template<typename Packet>
-  inline const Packet packetOp(const Packet& a) const
+  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
  { return internal::pmul(a,a); }
 };
 template<typename Scalar>
@@ -520,7 +592,7 @@ struct scalar_cube_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a*a; }
  template<typename Packet>
-  inline const Packet packetOp(const Packet& a) const
+  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
  { return internal::pmul(a,pmul(a,a)); }
 };
 template<typename Scalar>
@@ -535,7 +607,7 @@ template<typename Scalar> struct scalar_round_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_round_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::round(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::pround(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pround(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_round_op<Scalar> >
@@ -554,7 +626,7 @@ template<typename Scalar> struct scalar_floor_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_floor_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::floor(a); }
  template <typename Packet>
-  inline Packet packetOp(const Packet& a) const { return internal::pfloor(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pfloor(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_floor_op<Scalar> >
@@ -573,7 +645,7 @@ template<typename Scalar> struct scalar_ceil_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); }
  typedef typename packet_traits<Scalar>::type Packet;
-  inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_ceil_op<Scalar> >
@@ -655,6 +727,49 @@ struct functor_traits<scalar_boolean_not_op<Scalar> > {
  };
 };

+/** \internal
+  * \brief Template functor to compute the signum of a scalar
+  * \sa class CwiseUnaryOp, Cwise::sign()
+  */
+template<typename Scalar,bool iscpx=(NumTraits<Scalar>::IsComplex!=0) > struct scalar_sign_op;
+template<typename Scalar> 
+struct scalar_sign_op<Scalar,false> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const 
+  {
+      return Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
+  }
+  //TODO
+  //template <typename Packet>
+  //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }
+};
+template<typename Scalar> 
+struct scalar_sign_op<Scalar,true> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const 
+  {
+    using std::abs;
+    typedef typename NumTraits<Scalar>::Real real_type;
+    real_type aa = abs(a);
+    if (aa==0)
+      return Scalar(0); 
+    aa = 1./aa; 
+    return Scalar(real(a)*aa, imag(a)*aa );
+  }
+  //TODO
+  //template <typename Packet>
+  //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sign_op<Scalar> >
+{ enum {
+    Cost = 
+        NumTraits<Scalar>::IsComplex
+        ? ( 8*NumTraits<Scalar>::MulCost  ) // roughly
+        : ( 3*NumTraits<Scalar>::AddCost),
+    PacketAccess = packet_traits<Scalar>::HasSign
+  };
+};

 } // end namespace internal

--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -36,37 +36,40 @@ const std::ptrdiff_t defaultL3CacheSize = 512*1024;
 #endif

 /** \internal */
-inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)
-{
-  static bool m_cache_sizes_initialized = false;
-  static std::ptrdiff_t m_l1CacheSize = 0;
-  static std::ptrdiff_t m_l2CacheSize = 0;
-  static std::ptrdiff_t m_l3CacheSize = 0;
-
-  if(!m_cache_sizes_initialized)
-  {
+struct CacheSizes { 
+  CacheSizes(): m_l1(-1),m_l2(-1),m_l3(-1) {
    int l1CacheSize, l2CacheSize, l3CacheSize;
    queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize);
-    m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize);
-    m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize);
-    m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize);
-    m_cache_sizes_initialized = true;
+    m_l1 = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize);
+    m_l2 = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize);
+    m_l3 = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize);
  }

+  std::ptrdiff_t m_l1;
+  std::ptrdiff_t m_l2;
+  std::ptrdiff_t m_l3;
+};
+
+
+/** \internal */
+inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)
+{
+  static CacheSizes m_cacheSizes;
+
  if(action==SetAction)
  {
    // set the cpu cache size and cache all block sizes from a global cache size in byte
    eigen_internal_assert(l1!=0 && l2!=0);
-    m_l1CacheSize = *l1;
-    m_l2CacheSize = *l2;
-    m_l3CacheSize = *l3;
+    m_cacheSizes.m_l1 = *l1;
+    m_cacheSizes.m_l2 = *l2;
+    m_cacheSizes.m_l3 = *l3;
  }
  else if(action==GetAction)
  {
    eigen_internal_assert(l1!=0 && l2!=0);
-    *l1 = m_l1CacheSize;
-    *l2 = m_l2CacheSize;
-    *l3 = m_l3CacheSize;
+    *l1 = m_cacheSizes.m_l1;
+    *l2 = m_cacheSizes.m_l2;
+    *l3 = m_cacheSizes.m_l3;
  }
  else
  {
@@ -200,8 +203,6 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
    const Index actual_l2 = 1572864; // == 1.5 MB
    #endif
    
-    
-    
    // Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2.
    // The second half is implicitly reserved to access the result and lhs coefficients.
    // When k<max_kc, then nc can arbitrarily growth. In practice, it seems to be fruitful
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -149,7 +149,7 @@ static void run(Index rows, Index cols, Index depth,
      {
      for(Index i=0; i<threads; ++i)
        #pragma omp atomic
-        --(info[i].users);
+        info[i].users -= 1;
      }
    }
  }
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -102,21 +102,17 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
  // - we are not already in a parallel code
  // - the sizes are large enough

-  // 1- are we already in a parallel session?
-  // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
-  if((!Condition) || (omp_get_num_threads()>1))
-    return func(0,rows, 0,cols);
-
-  Index size = transpose ? rows : cols;
-
-  // 2- compute the maximal number of threads from the size of the product:
+  // compute the maximal number of threads from the size of the product:
  // FIXME this has to be fine tuned
-  Index max_threads = std::max<Index>(1,size / 32);
+  Index size = transpose ? rows : cols;
+  Index pb_max_threads = std::max<Index>(1,size / 32);
+  // compute the number of threads we are going to use
+  Index threads = std::min<Index>(nbThreads(), pb_max_threads);

-  // 3 - compute the number of threads we are going to use
-  Index threads = std::min<Index>(nbThreads(), max_threads);
-
-  if(threads==1)
+  // if multi-threading is explicitely disabled, not useful, or if we already are in a parallel session,
+  // then abort multi-threading
+  // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
+  if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
    return func(0,rows, 0,cols);

  Eigen::initParallel();
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -30,7 +30,7 @@ struct selfadjoint_matrix_vector_product
 static EIGEN_DONT_INLINE void run(
  Index size,
  const Scalar*  lhs, Index lhsStride,
-  const Scalar* _rhs, Index rhsIncr,
+  const Scalar*  rhs,
  Scalar* res,
  Scalar alpha);
 };
@@ -39,11 +39,12 @@ template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool Conju
 EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
  Index size,
  const Scalar*  lhs, Index lhsStride,
-  const Scalar* _rhs, Index rhsIncr,
+  const Scalar*  rhs,
  Scalar* res,
  Scalar alpha)
 {
  typedef typename packet_traits<Scalar>::type Packet;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
  const Index PacketSize = sizeof(Packet)/sizeof(Scalar);

  enum {
@@ -54,23 +55,13 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd

  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> cj0;
  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
-  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex, ConjugateRhs> cjd;
+  conj_helper<RealScalar,Scalar,false, ConjugateRhs> cjd;

  conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> pcj0;
  conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;

  Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha;

-  // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
-  // if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
-  // this is because we need to extract packets
-  ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0);  
-  if (rhsIncr!=1)
-  {
-    const Scalar* it = _rhs;
-    for (Index i=0; i<size; ++i, it+=rhsIncr)
-      rhs[i] = *it;
-  }

  Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
  if (FirstTriangular)
@@ -97,7 +88,6 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
    size_t alignedStart = (starti) + internal::first_default_aligned(&res[starti], endi-starti);
    size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);

-    // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
    res[j]   += cjd.pmul(numext::real(A0[j]), t0);
    res[j+1] += cjd.pmul(numext::real(A1[j+1]), t1);
    if(FirstTriangular)
@@ -151,7 +141,6 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd

    Scalar t1 = cjAlpha * rhs[j];
    Scalar t2(0);
-    // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
    res[j] += cjd.pmul(numext::real(A0[j]), t1);
    for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
    {
@@ -238,7 +227,7 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
      (
        lhs.rows(),                             // size
        &lhs.coeffRef(0,0),  lhs.outerStride(), // lhs info
-        actualRhsPtr, 1,                        // rhs info
+        actualRhsPtr,                           // rhs info
        actualDestPtr,                          // result info
        actualAlpha                             // scale factor
      );
--- a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
@@ -52,16 +52,16 @@ template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool Con
 struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
 static void run( \
  Index size, const Scalar*  lhs, Index lhsStride, \
-  const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
+  const Scalar* _rhs, Scalar* res, Scalar alpha) { \
    enum {\
      IsColMajor = StorageOrder==ColMajor \
    }; \
    if (IsColMajor == ConjugateLhs) {\
      selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
-        size, lhs, lhsStride, _rhs, rhsIncr, res, alpha);  \
+        size, lhs, lhsStride, _rhs, res, alpha);  \
    } else {\
      selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
-        size, lhs, lhsStride, _rhs, rhsIncr, res, alpha);  \
+        size, lhs, lhsStride, _rhs, res, alpha);  \
    }\
  } \
 }; \
@@ -79,13 +79,13 @@ typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
 \
 static void run( \
 Index size, const EIGTYPE*  lhs, Index lhsStride, \
-const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
+const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
 { \
  enum {\
    IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
    IsLower = UpLo == Lower ? 1 : 0 \
  }; \
-  MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \
+  MKL_INT n=size, lda=lhsStride, incx=1, incy=1; \
  MKLTYPE alpha_, beta_; \
  const EIGTYPE *x_ptr, myone(1); \
  char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
@@ -93,10 +93,9 @@ const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
  assign_scalar_eig2mkl(beta_, myone); \
  SYMVVector x_tmp; \
  if (ConjugateRhs) { \
-    Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \
+    Map<const SYMVVector, 0 > map_x(_rhs,size,1); \
    x_tmp=map_x.conjugate(); \
    x_ptr=x_tmp.data(); \
-    incx=1; \
  } else x_ptr=_rhs; \
  MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
 }\
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -304,9 +304,12 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
                for (Index i=0; i<actual_mc; ++i)
                  r[i] -= a[i] * b;
              }
-              Scalar b = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(rhs(j,j));
-              for (Index i=0; i<actual_mc; ++i)
-                r[i] *= b;
+              if((Mode & UnitDiag)==0)
+              {
+                Scalar b = conj(rhs(j,j));
+                for (Index i=0; i<actual_mc; ++i)
+                  r[i] /= b;
+              }
            }

            // pack the just computed part of lhs to A
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -30,6 +30,14 @@ const int DynamicIndex = 0xffffff;
  */
 const int Infinity = -1;

+/** This value means that the cost to evaluate an expression coefficient is either very expensive or
+  * cannot be known at compile time.
+  *
+  * This value has to be positive to (1) simplify cost computation, and (2) allow to distinguish between a very expensive and very very expensive expressions.
+  * It thus must also be large enough to make sure unrolling won't happen and that sub expressions will be evaluated, but not too large to avoid overflow.
+  */
+const int HugeCost = 10000;
+
 /** \defgroup flags Flags
  * \ingroup Core_Module
  *
@@ -189,8 +197,8 @@ const unsigned int HereditaryBits = RowMajorBit
  */

 /** \ingroup enums
-  * Enum containing possible values for the \p Mode parameter of 
-  * MatrixBase::selfadjointView() and MatrixBase::triangularView(). */
+  * Enum containing possible values for the \c Mode or \c UpLo parameter of
+  * MatrixBase::selfadjointView() and MatrixBase::triangularView(), and selfadjoint solvers. */
 enum {
  /** View matrix as a lower triangular matrix. */
  Lower=0x1,                      
@@ -484,6 +492,9 @@ struct Dense {};
 /** The type used to identify a general sparse storage. */
 struct Sparse {};

+/** The type used to identify a general solver (foctored) storage. */
+struct SolverStorage {};
+
 /** The type used to identify a permutation storage. */
 struct PermutationStorage {};

@@ -498,6 +509,7 @@ struct ArrayXpr {};

 // An evaluator must define its shape. By default, it can be one of the following:
 struct DenseShape             { static std::string debugName() { return "DenseShape"; } };
+struct SolverShape            { static std::string debugName() { return "SolverShape"; } };
 struct HomogeneousShape       { static std::string debugName() { return "HomogeneousShape"; } };
 struct DiagonalShape          { static std::string debugName() { return "DiagonalShape"; } };
 struct BandShape              { static std::string debugName() { return "BandShape"; } };
@@ -523,7 +535,9 @@ enum ComparisonName {
  cmp_LT = 1,
  cmp_LE = 2,
  cmp_UNORD = 3,
-  cmp_NEQ = 4
+  cmp_NEQ = 4,
+  cmp_GT = 5,
+  cmp_GE = 6
 };
 } // end namespace internal

--- a/Eigen/src/Core/util/DisableStupidWarnings.h
+++ b/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -10,6 +10,7 @@
  // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data
  // 4273 - QtAlignedMalloc, inconsistent DLL linkage
  // 4324 - structure was padded due to declspec(align())
+  // 4503 - decorated name length exceeded, name was truncated
  // 4512 - assignment operator could not be generated
  // 4522 - 'class' : multiple assignment operators specified
  // 4700 - uninitialized local variable 'xyz' used
@@ -17,17 +18,19 @@
  #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
    #pragma warning( push )
  #endif
-  #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4512 4522 4700 4717 )
+  #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 )
 #elif defined __INTEL_COMPILER
  // 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
  //        ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
  //        typedef that may be a reference type.
  // 279  - controlling expression is constant
  //        ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case.
+  // 1684 - conversion from pointer to same-sized integral type (potential portability problem)
+  // 2259 - non-pointer conversion from "Eigen::Index={ptrdiff_t={long}}" to "int" may lose significant bits
  #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
    #pragma warning push
  #endif
-  #pragma warning disable 2196 279
+  #pragma warning disable 2196 279 1684 2259
 #elif defined __clang__
  // -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
  //     this is really a stupid warning as it warns on compile-time expressions involving enums
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -132,6 +132,7 @@ template<typename MatrixType> struct CommaInitializer;
 template<typename Derived> class ReturnByValue;
 template<typename ExpressionType> class ArrayWrapper;
 template<typename ExpressionType> class MatrixWrapper;
+template<typename Derived> class SolverBase;
 template<typename XprType> class InnerIterator;

 namespace internal {
@@ -160,8 +161,7 @@ template< typename T,
          typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,
          typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,
          typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
-          typename RhsScalar = typename traits<typename T::Rhs>::Scalar,
-          typename = EnableIf<true> // extra template parameter for SFINAE-based specialization
+          typename RhsScalar = typename traits<typename T::Rhs>::Scalar
        > struct product_evaluator;
 }

@@ -209,6 +209,7 @@ template<typename Scalar> struct scalar_random_op;
 template<typename Scalar> struct scalar_add_op;
 template<typename Scalar> struct scalar_constant_op;
 template<typename Scalar> struct scalar_identity_op;
+template<typename Scalar,bool iscpx> struct scalar_sign_op;

 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
 template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
@@ -266,7 +267,6 @@ template<typename Scalar> class Rotation2D;
 template<typename Scalar> class AngleAxis;
 template<typename Scalar,int Dim> class Translation;
 template<typename Scalar,int Dim> class AlignedBox;
-
 template<typename Scalar, int Options = AutoAlign> class Quaternion;
 template<typename Scalar,int Dim,int Mode,int _Options=AutoAlign> class Transform;
 template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class ParametrizedLine;
@@ -274,6 +274,9 @@ template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class Hyperp
 template<typename Scalar> class UniformScaling;
 template<typename MatrixType,int Direction> class Homogeneous;

+// Sparse module:
+template<typename Derived> class SparseMatrixBase;
+
 // MatrixFunctions module
 template<typename Derived> struct MatrixExponentialReturnValue;
 template<typename Derived> class MatrixFunctionReturnValue;
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -13,7 +13,7 @@

 #define EIGEN_WORLD_VERSION 3
 #define EIGEN_MAJOR_VERSION 2
-#define EIGEN_MINOR_VERSION 91
+#define EIGEN_MINOR_VERSION 92

 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                      (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -341,6 +341,13 @@
  #define EIGEN_HAVE_RVALUE_REFERENCES
 #endif

+// Does the compiler support C99?
+#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901))       \
+  || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
+  || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))
+#define EIGEN_HAS_C99_MATH 1
+#endif
+
 // Does the compiler support result_of?
 #if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L))
 #define EIGEN_HAS_STD_RESULT_OF 1
@@ -353,16 +360,19 @@

 // Does the compiler support const expressions?
 #ifdef __CUDACC__
-  // Const expressions are not supported regardless of what host compiler is used 
+// Const expressions are supported provided that c++11 is enabled and we're using nvcc 7.5 or above
+#if defined(__CUDACC_VER__) &&  __CUDACC_VER__ >= 70500 && __cplusplus > 199711L
+  #define EIGEN_HAS_CONSTEXPR 1
+#endif
 #elif (defined(__cplusplus) && __cplusplus >= 201402L) || \
-    EIGEN_GNUC_AT_LEAST(4,9)
+    EIGEN_GNUC_AT_LEAST(4,8)
 #define EIGEN_HAS_CONSTEXPR 1
 #endif

 // Does the compiler support C++11 math?
 // Let's be conservative and enable the default C++11 implementation only if we are sure it exists
 #ifndef EIGEN_HAS_CXX11_MATH
-  #if (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC)  \
+  #if (__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC)  \
      && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC)
    #define EIGEN_HAS_CXX11_MATH 1
  #else
@@ -372,17 +382,30 @@

 // Does the compiler support proper C++11 containers?
 #ifndef EIGEN_HAS_CXX11_CONTAINERS
-  #if ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG)) || EIGEN_COMP_MSVC >= 1900
+  #if    (__cplusplus > 201103L) \
+      || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
+      || EIGEN_COMP_MSVC >= 1900
    #define EIGEN_HAS_CXX11_CONTAINERS 1
  #else
    #define EIGEN_HAS_CXX11_CONTAINERS 0
  #endif
 #endif

+// Does the compiler support C++11 noexcept?
+#ifndef EIGEN_HAS_CXX11_NOEXCEPT
+  #if    (__cplusplus > 201103L) \
+      || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
+      || EIGEN_COMP_MSVC >= 1900
+    #define EIGEN_HAS_CXX11_NOEXCEPT 1
+  #else
+    #define EIGEN_HAS_CXX11_NOEXCEPT 0
+  #endif
+#endif
+
 /** Allows to disable some optimizations which might affect the accuracy of the result.
  * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
  * They currently include:
-  *   - single precision ArrayBase::sin() and ArrayBase::cos() when SSE vectorization is enabled.
+  *   - single precision ArrayBase::sin() and ArrayBase::cos() for SSE and AVX vectorization.
  */
 #ifndef EIGEN_FAST_MATH
 #define EIGEN_FAST_MATH 1
@@ -609,10 +632,14 @@ namespace Eigen {
  // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
  // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
  // certain common platform (compiler+architecture combinations) to avoid these problems.
-  // Only static alignment is really problematic (relies on nonstandard compiler extensions that don't
-  // work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even
-  // when we have to disable static alignment.
-  #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
+  // Only static alignment is really problematic (relies on nonstandard compiler extensions),
+  // try to keep heap alignment even when we have to disable static alignment.
+  #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
+  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
+  #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
+  // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support.
+  // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use.
+  // 4.8 and newer seem definitely unaffected.
  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
  #else
  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
@@ -747,8 +774,6 @@ namespace Eigen {
 * documentation in a single line.
 **/

-// TODO The EIGEN_DENSE_PUBLIC_INTERFACE should not exists anymore
-  
 #define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
  typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex<float>. */ \
  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex<T>, T were corresponding to RealScalar. */ \
@@ -761,17 +786,17 @@ namespace Eigen {
        Flags = Eigen::internal::traits<Derived>::Flags, \
        SizeAtCompileTime = Base::SizeAtCompileTime, \
        MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
-        IsVectorAtCompileTime = Base::IsVectorAtCompileTime };
-
-
-#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
-  EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
-  typedef typename Base::PacketScalar PacketScalar; \
-  enum { MaxRowsAtCompileTime = Eigen::internal::traits<Derived>::MaxRowsAtCompileTime, \
-        MaxColsAtCompileTime = Eigen::internal::traits<Derived>::MaxColsAtCompileTime}; \
+        IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \
  using Base::derived; \
  using Base::const_cast_derived;

+
+// FIXME Maybe the EIGEN_DENSE_PUBLIC_INTERFACE could be removed as importing PacketScalar is rarely needed
+#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
+  EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
+  typedef typename Base::PacketScalar PacketScalar;
+
+
 #define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
 #define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)

@@ -837,4 +862,12 @@ namespace Eigen {
 #  define EIGEN_CATCH(X) else
 #endif

+#if EIGEN_HAS_CXX11_NOEXCEPT
+#   define EIGEN_NO_THROW noexcept(true)
+#   define EIGEN_EXCEPTION_SPEC(X) noexcept(false)
+#else
+#   define EIGEN_NO_THROW throw()
+#   define EIGEN_EXCEPTION_SPEC(X) throw(X)
+#endif
+
 #endif // EIGEN_MACROS_H
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -732,7 +732,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)

 #if EIGEN_MAX_ALIGN_BYTES!=0
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
-      void* operator new(size_t size, const std::nothrow_t&) throw() { \
+      void* operator new(size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
        EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
        EIGEN_CATCH (...) { return 0; } \
      }
@@ -743,20 +743,20 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
      void *operator new[](size_t size) { \
        return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
      } \
-      void operator delete(void * ptr) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
-      void operator delete[](void * ptr) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
-      void operator delete(void * ptr, std::size_t /* sz */) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
-      void operator delete[](void * ptr, std::size_t /* sz */) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+      void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+      void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+      void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+      void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
      /* in-place new and delete. since (at least afaik) there is no actual   */ \
      /* memory allocated we can safely let the default implementation handle */ \
      /* this particular case. */ \
      static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
      static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \
-      void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \
-      void operator delete[](void * memory, void *ptr) throw() { return ::operator delete[](memory,ptr); } \
+      void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
+      void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
      /* nothrow-new (returns zero instead of std::bad_alloc) */ \
      EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
-      void operator delete(void *ptr, const std::nothrow_t&) throw() { \
+      void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
        Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
      } \
      typedef void eigen_aligned_operator_new_marker_type;
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -11,6 +11,11 @@
 #ifndef EIGEN_META_H
 #define EIGEN_META_H

+#if defined(__CUDA_ARCH__)
+#include <cfloat>
+#include <math_constants.h>
+#endif
+
 namespace Eigen {

 namespace internal {
@@ -68,6 +73,18 @@ template<> struct is_arithmetic<unsigned int>  { enum { value = true }; };
 template<> struct is_arithmetic<signed long>   { enum { value = true }; };
 template<> struct is_arithmetic<unsigned long> { enum { value = true }; };

+template<typename T> struct is_integral        { enum { value = false }; };
+template<> struct is_integral<bool>            { enum { value = true }; };
+template<> struct is_integral<char>            { enum { value = true }; };
+template<> struct is_integral<signed char>     { enum { value = true }; };
+template<> struct is_integral<unsigned char>   { enum { value = true }; };
+template<> struct is_integral<signed short>    { enum { value = true }; };
+template<> struct is_integral<unsigned short>  { enum { value = true }; };
+template<> struct is_integral<signed int>      { enum { value = true }; };
+template<> struct is_integral<unsigned int>    { enum { value = true }; };
+template<> struct is_integral<signed long>     { enum { value = true }; };
+template<> struct is_integral<unsigned long>   { enum { value = true }; };
+
 template <typename T> struct add_const { typedef const T type; };
 template <typename T> struct add_const<T&> { typedef T& type; };

@@ -138,16 +155,16 @@ template<> struct numeric_limits<float>
  EIGEN_DEVICE_FUNC
  static float (max)() { return CUDART_MAX_NORMAL_F; }
  EIGEN_DEVICE_FUNC
-  static float (min)() { return __FLT_EPSILON__; }
+  static float (min)() { return FLT_MIN; }
 };
 template<> struct numeric_limits<double>
 {
  EIGEN_DEVICE_FUNC
  static double epsilon() { return __DBL_EPSILON__; }
  EIGEN_DEVICE_FUNC
-  static double (max)() { return CUDART_INF; }
+  static double (max)() { return DBL_MAX; }
  EIGEN_DEVICE_FUNC
-  static double (min)() { return __DBL_EPSILON__; }
+  static double (min)() { return DBL_MIN; }
 };
 template<> struct numeric_limits<int>
 {
@@ -158,6 +175,15 @@ template<> struct numeric_limits<int>
  EIGEN_DEVICE_FUNC
  static int (min)() { return INT_MIN; }
 };
+template<> struct numeric_limits<unsigned int>
+{
+  EIGEN_DEVICE_FUNC
+  static unsigned int epsilon() { return 0; }
+  EIGEN_DEVICE_FUNC
+  static unsigned int (max)() { return UINT_MAX; }
+  EIGEN_DEVICE_FUNC
+  static unsigned int (min)() { return 0; }
+};
 template<> struct numeric_limits<long>
 {
  EIGEN_DEVICE_FUNC
@@ -167,6 +193,15 @@ template<> struct numeric_limits<long>
  EIGEN_DEVICE_FUNC
  static long (min)() { return LONG_MIN; }
 };
+template<> struct numeric_limits<unsigned long>
+{
+  EIGEN_DEVICE_FUNC
+  static unsigned long epsilon() { return 0; }
+  EIGEN_DEVICE_FUNC
+  static unsigned long (max)() { return ULONG_MAX; }
+  EIGEN_DEVICE_FUNC
+  static unsigned long (min)() { return 0; }
+};
 template<> struct numeric_limits<long long>
 {
  EIGEN_DEVICE_FUNC
@@ -176,6 +211,15 @@ template<> struct numeric_limits<long long>
  EIGEN_DEVICE_FUNC
  static long long (min)() { return LLONG_MIN; }
 };
+template<> struct numeric_limits<unsigned long long>
+{
+  EIGEN_DEVICE_FUNC
+  static unsigned long long epsilon() { return 0; }
+  EIGEN_DEVICE_FUNC
+  static unsigned long long (max)() { return ULLONG_MAX; }
+  EIGEN_DEVICE_FUNC
+  static unsigned long long (min)() { return 0; }
+};

 }

@@ -193,7 +237,6 @@ protected:
  EIGEN_DEVICE_FUNC ~noncopyable() {}
 };

-
 /** \internal
  * Convenient struct to get the result type of a unary or binary functor.
  *
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -93,7 +93,11 @@
        THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
        IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
-        STORAGE_LAYOUT_DOES_NOT_MATCH
+        STORAGE_LAYOUT_DOES_NOT_MATCH,
+        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE,
+        THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS,
+        MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
+        THIS_TYPE_IS_NOT_SUPPORTED
      };
    };

@@ -200,5 +204,9 @@
                                            >::value), \
                          YOU_CANNOT_MIX_ARRAYS_AND_MATRICES)

+// Check that a cost value is positive, and that is stay within a reasonable range
+// TODO this check could be enabled for internal debugging only
+#define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \
+      EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE);

 #endif // EIGEN_STATIC_ASSERT_H
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -233,33 +233,33 @@ template<typename XprType> struct size_of_xpr_at_compile_time
 */

 template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_matrix_type;
-template<typename T, typename BaseClassType> struct plain_matrix_type_dense;
+template<typename T, typename BaseClassType, int Flags> struct plain_matrix_type_dense;
 template<typename T> struct plain_matrix_type<T,Dense>
 {
-  typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind>::type type;
+  typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind, traits<T>::Flags>::type type;
 };
 template<typename T> struct plain_matrix_type<T,DiagonalShape>
 {
  typedef typename T::PlainObject type;
 };

-template<typename T> struct plain_matrix_type_dense<T,MatrixXpr>
+template<typename T, int Flags> struct plain_matrix_type_dense<T,MatrixXpr,Flags>
 {
  typedef Matrix<typename traits<T>::Scalar,
                traits<T>::RowsAtCompileTime,
                traits<T>::ColsAtCompileTime,
-                AutoAlign | (traits<T>::Flags&RowMajorBit ? RowMajor : ColMajor),
+                AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor),
                traits<T>::MaxRowsAtCompileTime,
                traits<T>::MaxColsAtCompileTime
          > type;
 };

-template<typename T> struct plain_matrix_type_dense<T,ArrayXpr>
+template<typename T, int Flags> struct plain_matrix_type_dense<T,ArrayXpr,Flags>
 {
  typedef Array<typename traits<T>::Scalar,
                traits<T>::RowsAtCompileTime,
                traits<T>::ColsAtCompileTime,
-                AutoAlign | (traits<T>::Flags&RowMajorBit ? RowMajor : ColMajor),
+                AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor),
                traits<T>::MaxRowsAtCompileTime,
                traits<T>::MaxColsAtCompileTime
          > type;
@@ -303,6 +303,15 @@ struct eval<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
 };


+/* similar to plain_matrix_type, but using the evaluator's Flags */
+template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_object_eval;
+
+template<typename T>
+struct plain_object_eval<T,Dense>
+{
+  typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind, evaluator<T>::Flags>::type type;
+};
+

 /* plain_matrix_type_column_major : same as plain_matrix_type but guaranteed to be column-major
 */
@@ -385,29 +394,23 @@ struct transfer_constness
  * \param n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression.
  * \param PlainObject the type of the temporary if needed.
  */
-template<typename T, int n, typename PlainObject = typename eval<T>::type> struct nested_eval
+template<typename T, int n, typename PlainObject = typename plain_object_eval<T>::type> struct nested_eval
 {
  enum {
-    // For the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values.
-    // the choice of 10000 makes it larger than any practical fixed value and even most dynamic values.
-    // in extreme cases where these assumptions would be wrong, we would still at worst suffer performance issues
-    // (poor choice of temporaries).
-    // It's important that this value can still be squared without integer overflowing.
-    DynamicAsInteger = 10000,
    ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost,
-    ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost),
-    CoeffReadCost = evaluator<T>::CoeffReadCost,  // TODO What if an evaluator evaluate itself into a tempory?
-                                                  // Then CoeffReadCost will be small but we still have to evaluate if n>1... 
-                                                  // The solution might be to ask the evaluator if it creates a temp. Perhaps we could even ask the number of temps?
-    CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost),
-    NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n,
-    CostEvalAsInteger   = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger,
-    CostNoEvalAsInteger = NAsInteger * CoeffReadCostAsInteger
+    CoeffReadCost = evaluator<T>::CoeffReadCost,  // NOTE What if an evaluator evaluate itself into a tempory?
+                                                  //      Then CoeffReadCost will be small (e.g., 1) but we still have to evaluate, especially if n>1.
+                                                  //      This situation is already taken care by the EvalBeforeNestingBit flag, which is turned ON
+                                                  //      for all evaluator creating a temporary. This flag is then propagated by the parent evaluators.
+                                                  //      Another solution could be to count the number of temps?
+    NAsInteger = n == Dynamic ? HugeCost : n,
+    CostEval   = (NAsInteger+1) * ScalarReadCost + CoeffReadCost,
+    CostNoEval = NAsInteger * CoeffReadCost
  };

  typedef typename conditional<
        ( (int(evaluator<T>::Flags) & EvalBeforeNestingBit) ||
-          (int(CostEvalAsInteger) < int(CostNoEvalAsInteger)) ),
+          (int(CostEval) < int(CostNoEval)) ),
        PlainObject,
        typename ref_selector<T>::type
  >::type type;
@@ -449,9 +452,9 @@ struct generic_xpr_base<Derived, XprKind, Dense>

 /** \internal Helper base class to add a scalar multiple operator
  * overloads for complex types */
-template<typename Derived,typename Scalar,typename OtherScalar,
+template<typename Derived, typename Scalar, typename OtherScalar, typename BaseType,
         bool EnableIt = !is_same<Scalar,OtherScalar>::value >
-struct special_scalar_op_base : public DenseCoeffsBase<Derived>
+struct special_scalar_op_base : public BaseType
 {
  // dummy operator* so that the
  // "using special_scalar_op_base::operator*" compiles
@@ -460,8 +463,8 @@ struct special_scalar_op_base : public DenseCoeffsBase<Derived>
  void operator/(dummy) const;
 };

-template<typename Derived,typename Scalar,typename OtherScalar>
-struct special_scalar_op_base<Derived,Scalar,OtherScalar,true>  : public DenseCoeffsBase<Derived>
+template<typename Derived,typename Scalar,typename OtherScalar, typename BaseType>
+struct special_scalar_op_base<Derived,Scalar,OtherScalar,BaseType,true>  : public BaseType
 {
  const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
  operator*(const OtherScalar& scalar) const
@@ -654,6 +657,43 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_acces
  return false;
 }

+template<typename T, typename U> struct is_same_or_void { enum { value = is_same<T,U>::value }; };
+template<typename T> struct is_same_or_void<void,T>     { enum { value = 1 }; };
+template<typename T> struct is_same_or_void<T,void>     { enum { value = 1 }; };
+template<>           struct is_same_or_void<void,void>  { enum { value = 1 }; };
+
+#ifdef EIGEN_DEBUG_ASSIGN
+std::string demangle_traversal(int t)
+{
+  if(t==DefaultTraversal) return "DefaultTraversal";
+  if(t==LinearTraversal) return "LinearTraversal";
+  if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal";
+  if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal";
+  if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal";
+  return "?";
+}
+std::string demangle_unrolling(int t)
+{
+  if(t==NoUnrolling) return "NoUnrolling";
+  if(t==InnerUnrolling) return "InnerUnrolling";
+  if(t==CompleteUnrolling) return "CompleteUnrolling";
+  return "?";
+}
+std::string demangle_flags(int f)
+{
+  std::string res;
+  if(f&RowMajorBit)                 res += " | RowMajor";
+  if(f&PacketAccessBit)             res += " | Packet";
+  if(f&LinearAccessBit)             res += " | Linear";
+  if(f&LvalueBit)                   res += " | Lvalue";
+  if(f&DirectAccessBit)             res += " | Direct";
+  if(f&NestByRefBit)                res += " | NestByRef";
+  if(f&NoPreferredStorageOrderBit)  res += " | NoPreferredStorageOrderBit";
+  
+  return res;
+}
+#endif
+
 } // end namespace internal

 // we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
@@ -666,7 +706,7 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_acces
 #define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
  EIGEN_STATIC_ASSERT((internal::functor_is_product_like<BINOP>::ret \
                        ? int(internal::scalar_product_traits<LHS, RHS>::Defined) \
-                        : int(internal::is_same<LHS, RHS>::value)), \
+                        : int(internal::is_same_or_void<LHS, RHS>::value)), \
    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
    
 } // end namespace Eigen
--- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h
+++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@@ -122,7 +122,8 @@ template<typename _MatrixType> class ComplexEigenSolver
      *
      * This constructor calls compute() to compute the eigendecomposition.
      */
-    explicit ComplexEigenSolver(const MatrixType& matrix, bool computeEigenvectors = true)
+    template<typename InputType>
+    explicit ComplexEigenSolver(const EigenBase<InputType>& matrix, bool computeEigenvectors = true)
            : m_eivec(matrix.rows(),matrix.cols()),
              m_eivalues(matrix.cols()),
              m_schur(matrix.rows()),
@@ -130,7 +131,7 @@ template<typename _MatrixType> class ComplexEigenSolver
              m_eigenvectorsOk(false),
              m_matX(matrix.rows(),matrix.cols())
    {
-      compute(matrix, computeEigenvectors);
+      compute(matrix.derived(), computeEigenvectors);
    }

    /** \brief Returns the eigenvectors of given matrix.
@@ -208,7 +209,8 @@ template<typename _MatrixType> class ComplexEigenSolver
      * Example: \include ComplexEigenSolver_compute.cpp
      * Output: \verbinclude ComplexEigenSolver_compute.out
      */
-    ComplexEigenSolver& compute(const MatrixType& matrix, bool computeEigenvectors = true);
+    template<typename InputType>
+    ComplexEigenSolver& compute(const EigenBase<InputType>& matrix, bool computeEigenvectors = true);

    /** \brief Reports whether previous computation was successful.
      *
@@ -254,8 +256,9 @@ template<typename _MatrixType> class ComplexEigenSolver


 template<typename MatrixType>
+template<typename InputType>
 ComplexEigenSolver<MatrixType>& 
-ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
+ComplexEigenSolver<MatrixType>::compute(const EigenBase<InputType>& matrix, bool computeEigenvectors)
 {
  check_template_parameters();
  
@@ -264,13 +267,13 @@ ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEi

  // Do a complex Schur decomposition, A = U T U^*
  // The eigenvalues are on the diagonal of T.
-  m_schur.compute(matrix, computeEigenvectors);
+  m_schur.compute(matrix.derived(), computeEigenvectors);

  if(m_schur.info() == Success)
  {
    m_eivalues = m_schur.matrixT().diagonal();
    if(computeEigenvectors)
-      doComputeEigenvectors(matrix.norm());
+      doComputeEigenvectors(m_schur.matrixT().norm());
    sortEigenvalues(computeEigenvectors);
  }

--- a/Eigen/src/Eigenvalues/ComplexSchur.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur.h
@@ -109,7 +109,8 @@ template<typename _MatrixType> class ComplexSchur
      *
      * \sa matrixT() and matrixU() for examples.
      */
-    explicit ComplexSchur(const MatrixType& matrix, bool computeU = true)
+    template<typename InputType>
+    explicit ComplexSchur(const EigenBase<InputType>& matrix, bool computeU = true)
      : m_matT(matrix.rows(),matrix.cols()),
        m_matU(matrix.rows(),matrix.cols()),
        m_hess(matrix.rows()),
@@ -117,7 +118,7 @@ template<typename _MatrixType> class ComplexSchur
        m_matUisUptodate(false),
        m_maxIters(-1)
    {
-      compute(matrix, computeU);
+      compute(matrix.derived(), computeU);
    }

    /** \brief Returns the unitary matrix in the Schur decomposition. 
@@ -186,7 +187,8 @@ template<typename _MatrixType> class ComplexSchur
      *
      * \sa compute(const MatrixType&, bool, Index)
      */
-    ComplexSchur& compute(const MatrixType& matrix, bool computeU = true);
+    template<typename InputType>
+    ComplexSchur& compute(const EigenBase<InputType>& matrix, bool computeU = true);
    
    /** \brief Compute Schur decomposition from a given Hessenberg matrix
     *  \param[in] matrixH Matrix in Hessenberg form H
@@ -313,14 +315,15 @@ typename ComplexSchur<MatrixType>::ComplexScalar ComplexSchur<MatrixType>::compu


 template<typename MatrixType>
-ComplexSchur<MatrixType>& ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool computeU)
+template<typename InputType>
+ComplexSchur<MatrixType>& ComplexSchur<MatrixType>::compute(const EigenBase<InputType>& matrix, bool computeU)
 {
  m_matUisUptodate = false;
  eigen_assert(matrix.cols() == matrix.rows());

  if(matrix.cols() == 1)
  {
-    m_matT = matrix.template cast<ComplexScalar>();
+    m_matT = matrix.derived().template cast<ComplexScalar>();
    if(computeU)  m_matU = ComplexMatrixType::Identity(1,1);
    m_info = Success;
    m_isInitialized = true;
@@ -328,7 +331,7 @@ ComplexSchur<MatrixType>& ComplexSchur<MatrixType>::compute(const MatrixType& ma
    return *this;
  }

-  internal::complex_schur_reduce_to_hessenberg<MatrixType, NumTraits<Scalar>::IsComplex>::run(*this, matrix, computeU);
+  internal::complex_schur_reduce_to_hessenberg<MatrixType, NumTraits<Scalar>::IsComplex>::run(*this, matrix.derived(), computeU);
  computeFromHessenberg(m_matT, m_matU, computeU);
  return *this;
 }
--- a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
@@ -40,9 +40,9 @@ namespace Eigen {
 /** \internal Specialization for the data types supported by MKL */

 #define EIGEN_MKL_SCHUR_COMPLEX(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \
-template<> inline \
+template<> template<typename InputType> inline \
 ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
-ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, bool computeU) \
+ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, bool computeU) \
 { \
  typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> MatrixType; \
  typedef MatrixType::RealScalar RealScalar; \
@@ -53,7 +53,7 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matri
  m_matUisUptodate = false; \
  if(matrix.cols() == 1) \
  { \
-    m_matT = matrix.cast<ComplexScalar>(); \
+    m_matT = matrix.derived().template cast<ComplexScalar>(); \
    if(computeU)  m_matU = ComplexMatrixType::Identity(1,1); \
      m_info = Success; \
      m_isInitialized = true; \
@@ -61,7 +61,6 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matri
      return *this; \
  } \
  lapack_int n = matrix.cols(), sdim, info; \
-  lapack_int lda = matrix.outerStride(); \
  lapack_int matrix_order = MKLCOLROW; \
  char jobvs, sort='N'; \
  LAPACK_##MKLPREFIX_U##_SELECT1 select = 0; \
@@ -69,6 +68,7 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matri
  m_matU.resize(n, n); \
  lapack_int ldvs  = m_matU.outerStride(); \
  m_matT = matrix; \
+  lapack_int lda = m_matT.outerStride(); \
  Matrix<EIGTYPE, Dynamic, Dynamic> w; \
  w.resize(n, 1);\
  info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)w.data(), (MKLTYPE*)m_matU.data(), ldvs ); \
--- a/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/Eigen/src/Eigenvalues/EigenSolver.h
@@ -110,7 +110,7 @@ template<typename _MatrixType> class EigenSolver
      *
      * \sa compute() for an example.
      */
- EigenSolver() : m_eivec(), m_eivalues(), m_isInitialized(false), m_realSchur(), m_matT(), m_tmp() {}
+    EigenSolver() : m_eivec(), m_eivalues(), m_isInitialized(false), m_realSchur(), m_matT(), m_tmp() {}

    /** \brief Default constructor with memory preallocation
      *
@@ -143,7 +143,8 @@ template<typename _MatrixType> class EigenSolver
      *
      * \sa compute()
      */
-    explicit EigenSolver(const MatrixType& matrix, bool computeEigenvectors = true)
+    template<typename InputType>
+    explicit EigenSolver(const EigenBase<InputType>& matrix, bool computeEigenvectors = true)
      : m_eivec(matrix.rows(), matrix.cols()),
        m_eivalues(matrix.cols()),
        m_isInitialized(false),
@@ -152,7 +153,7 @@ template<typename _MatrixType> class EigenSolver
        m_matT(matrix.rows(), matrix.cols()), 
        m_tmp(matrix.cols())
    {
-      compute(matrix, computeEigenvectors);
+      compute(matrix.derived(), computeEigenvectors);
    }

    /** \brief Returns the eigenvectors of given matrix. 
@@ -273,7 +274,8 @@ template<typename _MatrixType> class EigenSolver
      * Example: \include EigenSolver_compute.cpp
      * Output: \verbinclude EigenSolver_compute.out
      */
-    EigenSolver& compute(const MatrixType& matrix, bool computeEigenvectors = true);
+    template<typename InputType>
+    EigenSolver& compute(const EigenBase<InputType>& matrix, bool computeEigenvectors = true);

    /** \returns NumericalIssue if the input contains INF or NaN values or overflow occured. Returns Success otherwise. */
    ComputationInfo info() const
@@ -370,8 +372,9 @@ typename EigenSolver<MatrixType>::EigenvectorsType EigenSolver<MatrixType>::eige
 }

 template<typename MatrixType>
+template<typename InputType>
 EigenSolver<MatrixType>& 
-EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
+EigenSolver<MatrixType>::compute(const EigenBase<InputType>& matrix, bool computeEigenvectors)
 {
  check_template_parameters();
  
@@ -381,7 +384,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
  eigen_assert(matrix.cols() == matrix.rows());

  // Reduce to real Schur form.
-  m_realSchur.compute(matrix, computeEigenvectors);
+  m_realSchur.compute(matrix.derived(), computeEigenvectors);
  
  m_info = m_realSchur.info();

--- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
+++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@@ -145,7 +145,7 @@ template<typename _MatrixType> class GeneralizedEigenSolver
      *
      * \sa compute()
      */
-    explicit GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true)
+    GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true)
      : m_eivec(A.rows(), A.cols()),
        m_alphas(A.cols()),
        m_betas(A.cols()),
--- a/Eigen/src/Eigenvalues/HessenbergDecomposition.h
+++ b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
@@ -115,8 +115,9 @@ template<typename _MatrixType> class HessenbergDecomposition
      *
      * \sa matrixH() for an example.
      */
-    explicit HessenbergDecomposition(const MatrixType& matrix)
-      : m_matrix(matrix),
+    template<typename InputType>
+    explicit HessenbergDecomposition(const EigenBase<InputType>& matrix)
+      : m_matrix(matrix.derived()),
        m_temp(matrix.rows()),
        m_isInitialized(false)
    {
@@ -147,9 +148,10 @@ template<typename _MatrixType> class HessenbergDecomposition
      * Example: \include HessenbergDecomposition_compute.cpp
      * Output: \verbinclude HessenbergDecomposition_compute.out
      */
-    HessenbergDecomposition& compute(const MatrixType& matrix)
+    template<typename InputType>
+    HessenbergDecomposition& compute(const EigenBase<InputType>& matrix)
    {
-      m_matrix = matrix;
+      m_matrix = matrix.derived();
      if(matrix.rows()<2)
      {
        m_isInitialized = true;
--- a/Eigen/src/Eigenvalues/RealQZ.h
+++ b/Eigen/src/Eigenvalues/RealQZ.h
@@ -101,7 +101,7 @@ namespace Eigen {
       *
       * This constructor calls compute() to compute the QZ decomposition.
       */
-      explicit RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) :
+      RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) :
        m_S(A.rows(),A.cols()),
        m_T(A.rows(),A.cols()),
        m_Q(A.rows(),A.cols()),
--- a/Eigen/src/Eigenvalues/RealSchur.h
+++ b/Eigen/src/Eigenvalues/RealSchur.h
@@ -100,7 +100,8 @@ template<typename _MatrixType> class RealSchur
      * Example: \include RealSchur_RealSchur_MatrixType.cpp
      * Output: \verbinclude RealSchur_RealSchur_MatrixType.out
      */
-    explicit RealSchur(const MatrixType& matrix, bool computeU = true)
+    template<typename InputType>
+    explicit RealSchur(const EigenBase<InputType>& matrix, bool computeU = true)
            : m_matT(matrix.rows(),matrix.cols()),
              m_matU(matrix.rows(),matrix.cols()),
              m_workspaceVector(matrix.rows()),
@@ -109,7 +110,7 @@ template<typename _MatrixType> class RealSchur
              m_matUisUptodate(false),
              m_maxIters(-1)
    {
-      compute(matrix, computeU);
+      compute(matrix.derived(), computeU);
    }

    /** \brief Returns the orthogonal matrix in the Schur decomposition. 
@@ -165,7 +166,8 @@ template<typename _MatrixType> class RealSchur
      *
      * \sa compute(const MatrixType&, bool, Index)
      */
-    RealSchur& compute(const MatrixType& matrix, bool computeU = true);
+    template<typename InputType>
+    RealSchur& compute(const EigenBase<InputType>& matrix, bool computeU = true);

    /** \brief Computes Schur decomposition of a Hessenberg matrix H = Z T Z^T
     *  \param[in] matrixH Matrix in Hessenberg form H
@@ -243,7 +245,8 @@ template<typename _MatrixType> class RealSchur


 template<typename MatrixType>
-RealSchur<MatrixType>& RealSchur<MatrixType>::compute(const MatrixType& matrix, bool computeU)
+template<typename InputType>
+RealSchur<MatrixType>& RealSchur<MatrixType>::compute(const EigenBase<InputType>& matrix, bool computeU)
 {
  eigen_assert(matrix.cols() == matrix.rows());
  Index maxIters = m_maxIters;
@@ -251,7 +254,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::compute(const MatrixType& matrix,
    maxIters = m_maxIterationsPerRow * matrix.rows();

  // Step 1. Reduce to Hessenberg form
-  m_hess.compute(matrix);
+  m_hess.compute(matrix.derived());

  // Step 2. Reduce to real Schur form  
  computeFromHessenberg(m_hess.matrixH(), m_hess.matrixQ(), computeU);
--- a/Eigen/src/Eigenvalues/RealSchur_MKL.h
+++ b/Eigen/src/Eigenvalues/RealSchur_MKL.h
@@ -40,14 +40,13 @@ namespace Eigen {
 /** \internal Specialization for the data types supported by MKL */

 #define EIGEN_MKL_SCHUR_REAL(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \
-template<> inline \
+template<> template<typename InputType> inline \
 RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
-RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, bool computeU) \
+RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, bool computeU) \
 { \
  eigen_assert(matrix.cols() == matrix.rows()); \
 \
  lapack_int n = matrix.cols(), sdim, info; \
-  lapack_int lda = matrix.outerStride(); \
  lapack_int matrix_order = MKLCOLROW; \
  char jobvs, sort='N'; \
  LAPACK_##MKLPREFIX_U##_SELECT2 select = 0; \
@@ -55,6 +54,7 @@ RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<E
  m_matU.resize(n, n); \
  lapack_int ldvs  = m_matU.outerStride(); \
  m_matT = matrix; \
+  lapack_int lda = m_matT.outerStride(); \
  Matrix<EIGTYPE, Dynamic, Dynamic> wr, wi; \
  wr.resize(n, 1); wi.resize(n, 1); \
  info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)wr.data(), (MKLTYPE*)wi.data(), (MKLTYPE*)m_matU.data(), ldvs ); \
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@@ -157,14 +157,15 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
      *
      * \sa compute(const MatrixType&, int)
      */
+    template<typename InputType>
    EIGEN_DEVICE_FUNC
-    explicit SelfAdjointEigenSolver(const MatrixType& matrix, int options = ComputeEigenvectors)
+    explicit SelfAdjointEigenSolver(const EigenBase<InputType>& matrix, int options = ComputeEigenvectors)
      : m_eivec(matrix.rows(), matrix.cols()),
        m_eivalues(matrix.cols()),
        m_subdiag(matrix.rows() > 1 ? matrix.rows() - 1 : 1),
        m_isInitialized(false)
    {
-      compute(matrix, options);
+      compute(matrix.derived(), options);
    }

    /** \brief Computes eigendecomposition of given matrix.
@@ -197,8 +198,9 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
      *
      * \sa SelfAdjointEigenSolver(const MatrixType&, int)
      */
+    template<typename InputType>
    EIGEN_DEVICE_FUNC
-    SelfAdjointEigenSolver& compute(const MatrixType& matrix, int options = ComputeEigenvectors);
+    SelfAdjointEigenSolver& compute(const EigenBase<InputType>& matrix, int options = ComputeEigenvectors);
    
    /** \brief Computes eigendecomposition of given matrix using a closed-form algorithm
      *
@@ -389,12 +391,15 @@ static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index sta
 }

 template<typename MatrixType>
+template<typename InputType>
 EIGEN_DEVICE_FUNC
 SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
-::compute(const MatrixType& matrix, int options)
+::compute(const EigenBase<InputType>& a_matrix, int options)
 {
  check_template_parameters();
  
+  const InputType &matrix(a_matrix.derived());
+  
  using std::abs;
  eigen_assert(matrix.cols() == matrix.rows());
  eigen_assert((options&~(EigVecMask|GenEigMask))==0
@@ -406,7 +411,7 @@ SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>

  if(n==1)
  {
-    m_eivalues.coeffRef(0,0) = numext::real(matrix.coeff(0,0));
+    m_eivalues.coeffRef(0,0) = numext::real(matrix(0,0));
    if(computeEigenvectors)
      m_eivec.setOnes(n,n);
    m_info = Success;
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h
@@ -40,9 +40,9 @@ namespace Eigen {
 /** \internal Specialization for the data types supported by MKL */

 #define EIGEN_MKL_EIG_SELFADJ(EIGTYPE, MKLTYPE, MKLRTYPE, MKLNAME, EIGCOLROW, MKLCOLROW ) \
-template<> inline \
+template<> template<typename InputType> inline \
 SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
-SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, int options) \
+SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, int options) \
 { \
  eigen_assert(matrix.cols() == matrix.rows()); \
  eigen_assert((options&~(EigVecMask|GenEigMask))==0 \
@@ -56,7 +56,7 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
 \
  if(n==1) \
  { \
-    m_eivalues.coeffRef(0,0) = numext::real(matrix.coeff(0,0)); \
+    m_eivalues.coeffRef(0,0) = numext::real(m_eivec.coeff(0,0)); \
    if(computeEigenvectors) m_eivec.setOnes(n,n); \
    m_info = Success; \
    m_isInitialized = true; \
@@ -64,7 +64,7 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
    return *this; \
  } \
 \
-  lda = matrix.outerStride(); \
+  lda = m_eivec.outerStride(); \
  matrix_order=MKLCOLROW; \
  char jobz, uplo='L'/*, range='A'*/; \
  jobz = computeEigenvectors ? 'V' : 'N'; \
--- a/Eigen/src/Eigenvalues/Tridiagonalization.h
+++ b/Eigen/src/Eigenvalues/Tridiagonalization.h
@@ -126,8 +126,9 @@ template<typename _MatrixType> class Tridiagonalization
      * Example: \include Tridiagonalization_Tridiagonalization_MatrixType.cpp
      * Output: \verbinclude Tridiagonalization_Tridiagonalization_MatrixType.out
      */
-    explicit Tridiagonalization(const MatrixType& matrix)
-      : m_matrix(matrix),
+    template<typename InputType>
+    explicit Tridiagonalization(const EigenBase<InputType>& matrix)
+      : m_matrix(matrix.derived()),
        m_hCoeffs(matrix.cols() > 1 ? matrix.cols()-1 : 1),
        m_isInitialized(false)
    {
@@ -152,9 +153,10 @@ template<typename _MatrixType> class Tridiagonalization
      * Example: \include Tridiagonalization_compute.cpp
      * Output: \verbinclude Tridiagonalization_compute.out
      */
-    Tridiagonalization& compute(const MatrixType& matrix)
+    template<typename InputType>
+    Tridiagonalization& compute(const EigenBase<InputType>& matrix)
    {
-      m_matrix = matrix;
+      m_matrix = matrix.derived();
      m_hCoeffs.resize(matrix.rows()-1, 1);
      internal::tridiagonalization_inplace(m_matrix, m_hCoeffs);
      m_isInitialized = true;
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h
@@ -163,7 +163,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
   * a uniform distribution */
  inline VectorType sample() const
  {
-    VectorType r;
+    VectorType r(dim());
    for(Index d=0; d<dim(); ++d)
    {
      if(!ScalarTraits::IsInteger)
--- a/Eigen/src/Geometry/AngleAxis.h
+++ b/Eigen/src/Geometry/AngleAxis.h
@@ -85,10 +85,17 @@ public:
  template<typename Derived>
  inline explicit AngleAxis(const MatrixBase<Derived>& m) { *this = m; }

+  /** \returns the value of the rotation angle in radian */
  Scalar angle() const { return m_angle; }
+  /** \returns a read-write reference to the stored angle in radian */
  Scalar& angle() { return m_angle; }

+  /** \returns the rotation axis */
  const Vector3& axis() const { return m_axis; }
+  /** \returns a read-write reference to the stored rotation axis.
+    *
+    * \warning The rotation axis must remain a \b unit vector.
+    */
  Vector3& axis() { return m_axis; }

  /** Concatenates two rotations */
@@ -133,7 +140,7 @@ public:
    m_angle = Scalar(other.angle());
  }

-  static inline const AngleAxis Identity() { return AngleAxis(0, Vector3::UnitX()); }
+  static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); }

  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
    * determined by \a prec.
@@ -170,8 +177,8 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived
  }
  else
  {
-    m_angle = 0;
-    m_axis << 1, 0, 0;
+    m_angle = Scalar(0);
+    m_axis << Scalar(1), Scalar(0), Scalar(0);
  }
  return *this;
 }
--- a/Eigen/src/Geometry/Homogeneous.h
+++ b/Eigen/src/Geometry/Homogeneous.h
@@ -445,6 +445,11 @@ struct generic_product_impl<Transform<Scalar,Dim,Mode,Options>, Homogeneous<RhsA
  }
 };

+template<typename ExpressionType, int Side, bool Transposed>
+struct permutation_matrix_product<ExpressionType, Side, Transposed, HomogeneousShape>
+  : public permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
+{};
+
 } // end namespace internal

 } // end namespace Eigen
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -278,6 +278,10 @@ public:
  inline const Coefficients& coeffs() const { return m_coeffs;}

  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsAlignment)
+  
+#ifdef EIGEN_QUATERNION_PLUGIN
+# include EIGEN_QUATERNION_PLUGIN
+#endif

 protected:
  Coefficients m_coeffs;
@@ -735,8 +739,9 @@ template<typename Other>
 struct quaternionbase_assign_impl<Other,3,3>
 {
  typedef typename Other::Scalar Scalar;
-  template<class Derived> static inline void run(QuaternionBase<Derived>& q, const Other& mat)
+  template<class Derived> static inline void run(QuaternionBase<Derived>& q, const Other& a_mat)
  {
+    const typename internal::nested_eval<Other,2>::type mat(a_mat);
    using std::sqrt;
    // This algorithm comes from  "Quaternion Calculus and Fast Animation",
    // Ken Shoemake, 1987 SIGGRAPH course notes
--- a/Eigen/src/Geometry/Rotation2D.h
+++ b/Eigen/src/Geometry/Rotation2D.h
@@ -64,6 +64,16 @@ public:
  /** Default constructor wihtout initialization. The represented rotation is undefined. */
  Rotation2D() {}

+  /** Construct a 2D rotation from a 2x2 rotation matrix \a mat.
+    *
+    * \sa fromRotationMatrix()
+    */
+  template<typename Derived>
+  explicit Rotation2D(const MatrixBase<Derived>& m)
+  {
+    fromRotationMatrix(m.derived());
+  }
+
  /** \returns the rotation angle */
  inline Scalar angle() const { return m_angle; }

@@ -103,6 +113,17 @@ public:
  Rotation2D& fromRotationMatrix(const MatrixBase<Derived>& m);
  Matrix2 toRotationMatrix() const;

+  /** Set \c *this from a 2x2 rotation matrix \a mat.
+    * In other words, this function extract the rotation angle from the rotation matrix.
+    *
+    * This method is an alias for fromRotationMatrix()
+    *
+    * \sa fromRotationMatrix()
+    */
+  template<typename Derived>
+  Rotation2D& operator=(const  MatrixBase<Derived>& m)
+  { return fromRotationMatrix(m.derived()); }
+
  /** \returns the spherical interpolation between \c *this and \a other using
    * parameter \a t. It is in fact equivalent to a linear interpolation.
    */
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -118,15 +118,15 @@ template<int Mode> struct transform_make_affine;
  *
  * However, unlike a plain matrix, the Transform class provides many features
  * simplifying both its assembly and usage. In particular, it can be composed
-  * with any other transformations (Transform,Translation,RotationBase,Matrix)
+  * with any other transformations (Transform,Translation,RotationBase,DiagonalMatrix)
  * and can be directly used to transform implicit homogeneous vectors. All these
  * operations are handled via the operator*. For the composition of transformations,
  * its principle consists to first convert the right/left hand sides of the product
  * to a compatible (Dim+1)^2 matrix and then perform a pure matrix product.
  * Of course, internally, operator* tries to perform the minimal number of operations
  * according to the nature of each terms. Likewise, when applying the transform
-  * to non homogeneous vectors, the latters are automatically promoted to homogeneous
-  * one before doing the matrix product. The convertions to homogeneous representations
+  * to points, the latters are automatically promoted to homogeneous vectors
+  * before doing the matrix product. The conventions to homogeneous representations
  * are performed as follow:
  *
  * \b Translation t (Dim)x(1):
@@ -140,7 +140,7 @@ template<int Mode> struct transform_make_affine;
  * R & 0\\
  * 0\,...\,0 & 1
  * \end{array} \right) \f$
-  *
+  *<!--
  * \b Linear \b Matrix L (Dim)x(Dim):
  * \f$ \left( \begin{array}{cc}
  * L & 0\\
@@ -152,14 +152,20 @@ template<int Mode> struct transform_make_affine;
  * A\\
  * 0\,...\,0\,1
  * \end{array} \right) \f$
+  *-->
+  * \b Scaling \b DiagonalMatrix S (Dim)x(Dim):
+  * \f$ \left( \begin{array}{cc}
+  * S & 0\\
+  * 0\,...\,0 & 1
+  * \end{array} \right) \f$
  *
-  * \b Column \b vector v (Dim)x(1):
+  * \b Column \b point v (Dim)x(1):
  * \f$ \left( \begin{array}{c}
  * v\\
  * 1
  * \end{array} \right) \f$
  *
-  * \b Set \b of \b column \b vectors V1...Vn (Dim)x(n):
+  * \b Set \b of \b column \b points V1...Vn (Dim)x(n):
  * \f$ \left( \begin{array}{ccc}
  * v_1 & ... & v_n\\
  * 1 & ... & 1
@@ -404,26 +410,39 @@ public:
  /** \returns a writable expression of the translation vector of the transformation */
  inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); }

-  /** \returns an expression of the product between the transform \c *this and a matrix expression \a other
+  /** \returns an expression of the product between the transform \c *this and a matrix expression \a other.
    *
-    * The right hand side \a other might be either:
-    * \li a vector of size Dim,
+    * The right-hand-side \a other can be either:
    * \li an homogeneous vector of size Dim+1,
-    * \li a set of vectors of size Dim x Dynamic,
-    * \li a set of homogeneous vectors of size Dim+1 x Dynamic,
-    * \li a linear transformation matrix of size Dim x Dim,
-    * \li an affine transformation matrix of size Dim x Dim+1,
+    * \li a set of homogeneous vectors of size Dim+1 x N,
    * \li a transformation matrix of size Dim+1 x Dim+1.
+    *
+    * Moreover, if \c *this represents an affine transformation (i.e., Mode!=Projective), then \a other can also be:
+    * \li a point of size Dim (computes: \code this->linear() * other + this->translation()\endcode),
+    * \li a set of N points as a Dim x N matrix (computes: \code (this->linear() * other).colwise() + this->translation()\endcode),
+    *
+    * In all cases, the return type is a matrix or vector of same sizes as the right-hand-side \a other.
+    *
+    * If you want to interpret \a other as a linear or affine transformation, then first convert it to a Transform<> type,
+    * or do your own cooking.
+    *
+    * Finally, if you want to apply Affine transformations to vectors, then explicitly apply the linear part only:
+    * \code
+    * Affine3f A;
+    * Vector3f v1, v2;
+    * v2 = A.linear() * v1;
+    * \endcode
+    *
    */
  // note: this function is defined here because some compilers cannot find the respective declaration
  template<typename OtherDerived>
-  EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl<Transform, OtherDerived>::ResultType
+  EIGEN_STRONG_INLINE const typename OtherDerived::PlainObject
  operator * (const EigenBase<OtherDerived> &other) const
  { return internal::transform_right_product_impl<Transform, OtherDerived>::run(*this,other.derived()); }

  /** \returns the product expression of a transformation matrix \a a times a transform \a b
    *
-    * The left hand side \a other might be either:
+    * The left hand side \a other can be either:
    * \li a linear transformation matrix of size Dim x Dim,
    * \li an affine transformation matrix of size Dim x Dim+1,
    * \li a general transformation matrix of size Dim+1 x Dim+1.
--- a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
+++ b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
@@ -23,6 +23,8 @@ namespace Eigen {
  *
  * \tparam _Scalar the type of the scalar.
  *
+  * \implsparsesolverconcept
+  *
  * This preconditioner is suitable for both selfadjoint and general problems.
  * The diagonal entries are pre-inverted and stored into a dense vector.
  *
@@ -37,8 +39,10 @@ class DiagonalPreconditioner
    typedef Matrix<Scalar,Dynamic,1> Vector;
  public:
    typedef typename Vector::StorageIndex StorageIndex;
-    // this typedef is only to export the scalar type and compile-time dimensions to solve_retval
-    typedef Matrix<Scalar,Dynamic,Dynamic> MatrixType;
+    enum {
+      ColsAtCompileTime = Dynamic,
+      MaxColsAtCompileTime = Dynamic
+    };

    DiagonalPreconditioner() : m_isInitialized(false) {}

@@ -114,6 +118,8 @@ class DiagonalPreconditioner
  *
  * \tparam _Scalar the type of the scalar.
  *
+  * \implsparsesolverconcept
+  *
  * The diagonal entries are pre-inverted and stored into a dense vector.
  * 
  * \sa class LeastSquaresConjugateGradient, class DiagonalPreconditioner
@@ -172,6 +178,8 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar>
 /** \ingroup IterativeLinearSolvers_Module
  * \brief A naive preconditioner which approximates any matrix as the identity matrix
  *
+  * \implsparsesolverconcept
+  *
  * \sa class DiagonalPreconditioner
  */
 class IdentityPreconditioner
--- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
+++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
@@ -132,6 +132,8 @@ struct traits<BiCGSTAB<_MatrixType,_Preconditioner> >
  * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix.
  * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
  *
+  * \implsparsesolverconcept
+  *
  * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
  * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
  * and NumTraits<Scalar>::epsilon() for the tolerance.
@@ -148,13 +150,15 @@ struct traits<BiCGSTAB<_MatrixType,_Preconditioner> >
  * By default the iterations start with x=0 as an initial guess of the solution.
  * One can control the start using the solveWithGuess() method.
  * 
+  * BiCGSTAB can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink.
+  *
  * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
  */
 template< typename _MatrixType, typename _Preconditioner>
 class BiCGSTAB : public IterativeSolverBase<BiCGSTAB<_MatrixType,_Preconditioner> >
 {
  typedef IterativeSolverBase<BiCGSTAB> Base;
-  using Base::mp_matrix;
+  using Base::matrix;
  using Base::m_error;
  using Base::m_iterations;
  using Base::m_info;
@@ -180,7 +184,8 @@ public:
    * this class becomes invalid. Call compute() to update it with the new
    * matrix A, or modify a copy of A.
    */
-  explicit BiCGSTAB(const MatrixType& A) : Base(A) {}
+  template<typename MatrixDerived>
+  explicit BiCGSTAB(const EigenBase<MatrixDerived>& A) : Base(A.derived()) {}

  ~BiCGSTAB() {}

@@ -195,7 +200,7 @@ public:
      m_error = Base::m_tolerance;
      
      typename Dest::ColXpr xj(x,j);
-      if(!internal::bicgstab(mp_matrix, b.col(j), xj, Base::m_preconditioner, m_iterations, m_error))
+      if(!internal::bicgstab(matrix(), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error))
        failed = true;
    }
    m_info = failed ? NumericalIssue
--- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
+++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
@@ -118,6 +118,8 @@ struct traits<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
  *               Default is \c Lower, best performance is \c Lower|Upper.
  * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
  *
+  * \implsparsesolverconcept
+  *
  * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
  * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
  * and NumTraits<Scalar>::epsilon() for the tolerance.
@@ -147,13 +149,15 @@ struct traits<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
  * By default the iterations start with x=0 as an initial guess of the solution.
  * One can control the start using the solveWithGuess() method.
  * 
+  * ConjugateGradient can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink.
+  *
  * \sa class LeastSquaresConjugateGradient, class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
  */
 template< typename _MatrixType, int _UpLo, typename _Preconditioner>
 class ConjugateGradient : public IterativeSolverBase<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
 {
  typedef IterativeSolverBase<ConjugateGradient> Base;
-  using Base::mp_matrix;
+  using Base::matrix;
  using Base::m_error;
  using Base::m_iterations;
  using Base::m_info;
@@ -183,7 +187,8 @@ public:
    * this class becomes invalid. Call compute() to update it with the new
    * matrix A, or modify a copy of A.
    */
-  explicit ConjugateGradient(const MatrixType& A) : Base(A) {}
+  template<typename MatrixDerived>
+  explicit ConjugateGradient(const EigenBase<MatrixDerived>& A) : Base(A.derived()) {}

  ~ConjugateGradient() {}

@@ -191,12 +196,19 @@ public:
  template<typename Rhs,typename Dest>
  void _solve_with_guess_impl(const Rhs& b, Dest& x) const
  {
-    typedef Ref<const MatrixType> MatRef;
-    typedef typename internal::conditional<UpLo==(Lower|Upper) && (!MatrixType::IsRowMajor) && (!NumTraits<Scalar>::IsComplex),
-                                           Transpose<const MatRef>, MatRef const&>::type RowMajorWrapper;
+    typedef typename Base::MatrixWrapper MatrixWrapper;
+    typedef typename Base::ActualMatrixType ActualMatrixType;
+    enum {
+      TransposeInput  =   (!MatrixWrapper::MatrixFree)
+                      &&  (UpLo==(Lower|Upper))
+                      &&  (!MatrixType::IsRowMajor)
+                      &&  (!NumTraits<Scalar>::IsComplex)
+    };
+    typedef typename internal::conditional<TransposeInput,Transpose<const ActualMatrixType>, ActualMatrixType const&>::type RowMajorWrapper;
+    EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(MatrixWrapper::MatrixFree,UpLo==(Lower|Upper)),MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY);
    typedef typename internal::conditional<UpLo==(Lower|Upper),
                                           RowMajorWrapper,
-                                           typename MatRef::template ConstSelfAdjointViewReturnType<UpLo>::Type
+                                           typename MatrixWrapper::template ConstSelfAdjointViewReturnType<UpLo>::Type
                                          >::type SelfAdjointWrapper;
    m_iterations = Base::maxIterations();
    m_error = Base::m_tolerance;
@@ -207,7 +219,7 @@ public:
      m_error = Base::m_tolerance;

      typename Dest::ColXpr xj(x,j);
-      RowMajorWrapper row_mat(mp_matrix);
+      RowMajorWrapper row_mat(matrix());
      internal::conjugate_gradient(SelfAdjointWrapper(row_mat), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error);
    }

--- a/Show More
+++ b/Show More