bump

Fixed a typo.
fix QuaternionBase::cast.
2026-04-10 11:34:33 +08:00 · 2011-12-06 15:53:17 +01:00 · 2011-12-06 15:42:05 +01:00 · 2011-12-05 14:13:59 +01:00 · 2011-12-05 12:50:43 +01:00 · 2011-12-05 10:17:09 +01:00
407 changed files with 28962 additions and 15137 deletions
--- a/.hgeol
+++ b/.hgeol
@@ -1,3 +1,8 @@
-[patterns]
-**.* = native
-eigen_autoexp_part.dat = CRLF
+[patterns]
+scripts/*.in = LF
+debug/msvc/*.dat = CRLF
+unsupported/test/mpreal/*.* = CRLF
+** = native
+
+[repository]
+native = LF
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -103,6 +103,8 @@ endif()

 add_definitions("-DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS")

+set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")
+
 if(CMAKE_COMPILER_IS_GNUCXX)
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing")
  set(CMAKE_CXX_FLAGS_DEBUG "-g3")
@@ -279,9 +281,21 @@ install(FILES
  )

 if(EIGEN_BUILD_PKGCONFIG)
+    SET(path_separator ":")
+    STRING(REPLACE ${path_separator} ";" pkg_config_libdir_search "$ENV{PKG_CONFIG_LIBDIR}")
+    message(STATUS "searching for 'pkgconfig' directory in PKG_CONFIG_LIBDIR ( $ENV{PKG_CONFIG_LIBDIR} ), ${CMAKE_INSTALL_PREFIX}/share, and ${CMAKE_INSTALL_PREFIX}/lib")
+    FIND_PATH(pkg_config_libdir pkgconfig ${pkg_config_libdir_search} ${CMAKE_INSTALL_PREFIX}/share ${CMAKE_INSTALL_PREFIX}/lib ${pkg_config_libdir_search})
+    if(pkg_config_libdir)
+        SET(pkg_config_install_dir ${pkg_config_libdir})
+        message(STATUS "found ${pkg_config_libdir}/pkgconfig" )
+    else(pkg_config_libdir)
+        SET(pkg_config_install_dir ${CMAKE_INSTALL_PREFIX}/share)
+        message(STATUS "pkgconfig not found; installing in ${pkg_config_install_dir}" )
+    endif(pkg_config_libdir)
+
    configure_file(eigen3.pc.in eigen3.pc)
    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc
-        DESTINATION share/pkgconfig
+        DESTINATION ${pkg_config_install_dir}/pkgconfig
        )
 endif(EIGEN_BUILD_PKGCONFIG)

@@ -289,44 +303,9 @@ add_subdirectory(Eigen)

 add_subdirectory(doc EXCLUDE_FROM_ALL)

-add_custom_target(buildtests)
-add_custom_target(check COMMAND "ctest")
-add_dependencies(check buildtests)
-
-# CMake/Ctest does not allow us to change the build command,
-# so we have to workaround by directly editing the generated DartConfiguration.tcl file
-# save CMAKE_MAKE_PROGRAM
-set(CMAKE_MAKE_PROGRAM_SAVE ${CMAKE_MAKE_PROGRAM})
-# and set a fake one
-set(CMAKE_MAKE_PROGRAM "@EIGEN_MAKECOMMAND_PLACEHOLDER@")
-
-include(CTest)
+include(EigenConfigureTesting)
+# fixme, not sure this line is still needed:
 enable_testing() # must be called from the root CMakeLists, see man page
-include(EigenTesting)
-ei_init_testing()
-
-# overwrite default DartConfiguration.tcl
-# The worarounds are different for each version of the MSVC IDE
-if(MSVC_IDE)
-  if(MSVC_VERSION EQUAL 1600) # MSVC 2010
-    set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests.vcxproj /p:Configuration=\${CTEST_CONFIGURATION_TYPE} \n # ")
-  else() # MSVC 2008 (TODO check MSVC 2005)
-    set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} /project buildtests")
-  endif()
-else()
-  # for make and nmake
-  set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests")
-endif()
-
-configure_file(${CMAKE_BINARY_DIR}/DartConfiguration.tcl ${CMAKE_BINARY_DIR}/DartConfiguration.tcl)
-# restore default CMAKE_MAKE_PROGRAM
-set(CMAKE_MAKE_PROGRAM ${CMAKE_MAKE_PROGRAM_SAVE})
-# un-set temporary variables so that it is like they never existed. 
-# CMake 2.6.3 introduces the more logical unset() syntax for this.
-set(CMAKE_MAKE_PROGRAM_SAVE) 
-set(EIGEN_MAKECOMMAND_PLACEHOLDER)
-
-configure_file(${CMAKE_SOURCE_DIR}/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake)


 if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
@@ -335,15 +314,13 @@ else()
  add_subdirectory(test EXCLUDE_FROM_ALL)
 endif()

-if(NOT MSVC)
-  if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
-    add_subdirectory(blas)
-    add_subdirectory(lapack)
-  else()
-    add_subdirectory(blas EXCLUDE_FROM_ALL)
-    add_subdirectory(lapack EXCLUDE_FROM_ALL)
-  endif()
-endif(NOT MSVC)
+if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
+  add_subdirectory(blas)
+  add_subdirectory(lapack)
+else()
+  add_subdirectory(blas EXCLUDE_FROM_ALL)
+  add_subdirectory(lapack EXCLUDE_FROM_ALL)
+endif()

 add_subdirectory(unsupported)

--- a/Eigen/CholmodSupport
+++ b/Eigen/CholmodSupport
@@ -0,0 +1,34 @@
+#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H
+#define EIGEN_CHOLMODSUPPORT_MODULE_H
+
+#include "SparseCore"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+extern "C" {
+  #include <cholmod.h>
+}
+
+namespace Eigen {
+
+/** \ingroup Support_modules
+  * \defgroup CholmodSupport_Module CholmodSupport module
+  *
+  *
+  * \code
+  * #include <Eigen/CholmodSupport>
+  * \endcode
+  */
+
+#include "src/misc/Solve.h"
+#include "src/misc/SparseSolve.h"
+
+#include "src/CholmodSupport/CholmodSupport.h"
+
+
+} // namespace Eigen
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_CHOLMODSUPPORT_MODULE_H
+
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -51,16 +51,16 @@
      #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
    #endif
  #endif
-#endif
-
-// Remember that usage of defined() in a #define is undefined by the standard
-#if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
-  #define EIGEN_SSE2_BUT_NOT_OLD_GCC
+#else
+  // Remember that usage of defined() in a #define is undefined by the standard
+  #if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
+    #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
+  #endif
 #endif

 #ifndef EIGEN_DONT_VECTORIZE

-  #if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
+  #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)

    // Defines symbols for compile-time detection of which instructions are
    // used.
@@ -143,6 +143,7 @@
 #ifdef EIGEN_HAS_ERRNO
 #include <cerrno>
 #endif
+#include <cstddef>
 #include <cstdlib>
 #include <cmath>
 #include <complex>
@@ -166,7 +167,7 @@
  #include <intrin.h>
 #endif

-#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_NO_EXCEPTIONS)
+#if defined(_CPPUNWIND) || defined(__EXCEPTIONS)
  #define EIGEN_EXCEPTIONS
 #endif

@@ -174,16 +175,7 @@
  #include <new>
 #endif

-// this needs to be done after all possible windows C header includes and before any Eigen source includes
-// (system C++ includes are supposed to be able to deal with this already):
-// windows.h defines min and max macros which would make Eigen fail to compile.
-#if defined(min) || defined(max)
-#error The preprocessor symbols 'min' or 'max' are defined. If you are compiling on Windows, do #define NOMINMAX to prevent windows.h from defining these symbols.
-#endif
-
-// defined in bits/termios.h
-#undef B0
-
+/** \brief Namespace containing all symbols from the %Eigen library. */
 namespace Eigen {

 inline static const char *SimdInstructionSetsInUse(void) {
@@ -239,6 +231,8 @@ inline static const char *SimdInstructionSetsInUse(void) {
 // we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
 // ensure QNX/QCC support
 using std::size_t;
+// gcc 4.6.0 wants std:: for ptrdiff_t 
+using std::ptrdiff_t;

 /** \defgroup Core_Module Core module
  * This is the main module of Eigen providing dense matrix and vector support
@@ -250,6 +244,10 @@ using std::size_t;
  * \endcode
  */

+/** \defgroup Support_modules Support modules [category]
+  * Category of modules which add support for external libraries.
+  */
+
 #include "src/Core/util/Constants.h"
 #include "src/Core/util/ForwardDeclarations.h"
 #include "src/Core/util/Meta.h"
@@ -321,7 +319,7 @@ using std::size_t;
 #include "src/Core/CommaInitializer.h"
 #include "src/Core/Flagged.h"
 #include "src/Core/ProductBase.h"
-#include "src/Core/Product.h"
+#include "src/Core/GeneralProduct.h"
 #include "src/Core/TriangularMatrix.h"
 #include "src/Core/SelfAdjointView.h"
 #include "src/Core/SolveTriangular.h"
@@ -350,6 +348,12 @@ using std::size_t;
 #include "src/Core/ArrayBase.h"
 #include "src/Core/ArrayWrapper.h"

+#ifdef EIGEN_ENABLE_EVALUATORS
+#include "src/Core/Product.h"
+#include "src/Core/CoreEvaluators.h"
+#include "src/Core/AssignEvaluator.h"
+#endif 
+
 } // namespace Eigen

 #include "src/Core/GlobalFunctions.h"
--- a/Eigen/Eigen2Support
+++ b/Eigen/Eigen2Support
@@ -33,7 +33,8 @@

 namespace Eigen {

-/** \defgroup Eigen2Support_Module Eigen2 support module
+/** \ingroup Support_modules
+  * \defgroup Eigen2Support_Module Eigen2 support module
  * This module provides a couple of deprecated functions improving the compatibility with Eigen2.
  *
  * To use it, define EIGEN2_SUPPORT before including any Eigen header
@@ -63,6 +64,24 @@ namespace Eigen {
 // Eigen2 used to include iostream
 #include<iostream>

+#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
+using Eigen::Matrix##SizeSuffix##TypeSuffix; \
+using Eigen::Vector##SizeSuffix##TypeSuffix; \
+using Eigen::RowVector##SizeSuffix##TypeSuffix;
+
+#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
+
+#define EIGEN_USING_MATRIX_TYPEDEFS \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
+
 #define USING_PART_OF_NAMESPACE_EIGEN \
 EIGEN_USING_MATRIX_TYPEDEFS \
 using Eigen::Matrix; \
--- a/Eigen/Eigenvalues
+++ b/Eigen/Eigenvalues
@@ -9,6 +9,7 @@
 #include "Jacobi"
 #include "Householder"
 #include "LU"
+#include "Geometry"

 namespace Eigen {

--- a/Eigen/IterativeLinearSolvers
+++ b/Eigen/IterativeLinearSolvers
@@ -0,0 +1,37 @@
+#ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
+#define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
+
+#include "SparseCore"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+namespace Eigen {
+
+/** \ingroup Sparse_modules
+  * \defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module
+  *
+  * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse.
+  * Those solvers are accessible via the following classes:
+  *  - ConjugateGradient for selfadjoint (hermitian) matrices,
+  *  - BiCGSTAB for general square matrices.
+  *
+  * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.
+  *
+  * \code
+  * #include <Eigen/IterativeLinearSolvers>
+  * \endcode
+  */
+
+#include "src/misc/Solve.h"
+#include "src/misc/SparseSolve.h"
+
+#include "src/IterativeLinearSolvers/IterativeSolverBase.h"
+#include "src/IterativeLinearSolvers/BasicPreconditioners.h"
+#include "src/IterativeLinearSolvers/ConjugateGradient.h"
+#include "src/IterativeLinearSolvers/BiCGSTAB.h"
+
+} // namespace Eigen
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
--- a/Eigen/OrderingMethods
+++ b/Eigen/OrderingMethods
@@ -0,0 +1,27 @@
+#ifndef EIGEN_ORDERINGMETHODS_MODULE_H
+#define EIGEN_ORDERINGMETHODS_MODULE_H
+
+#include "SparseCore"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+namespace Eigen {
+
+/** \ingroup Sparse_modules
+  * \defgroup OrderingMethods_Module OrderingMethods module
+  *
+  * This module is currently for internal use only.
+  *
+  *
+  * \code
+  * #include <Eigen/OrderingMethods>
+  * \endcode
+  */
+
+#include "src/OrderingMethods/Amd.h"
+
+} // namespace Eigen
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_ORDERINGMETHODS_MODULE_H
--- a/Eigen/Sparse
+++ b/Eigen/Sparse
@@ -1,69 +1,27 @@
 #ifndef EIGEN_SPARSE_MODULE_H
 #define EIGEN_SPARSE_MODULE_H

-#include "Core"
-
-#include "src/Core/util/DisableStupidWarnings.h"
-
-#include <vector>
-#include <map>
-#include <cstdlib>
-#include <cstring>
-#include <algorithm>
-
-#ifdef EIGEN2_SUPPORT
-#define EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
-#endif
-
-#ifndef EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
-#error The sparse module API is not stable yet. To use it anyway, please define the EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET preprocessor token.
-#endif
-
 namespace Eigen {

-/** \defgroup Sparse_Module Sparse module
+/** \defgroup Sparse_modules Sparse modules
  *
-  *
-  *
-  * See the \ref TutorialSparse "Sparse tutorial"
+  * Meta-module including all related modules:
+  * - SparseCore
+  * - OrderingMethods
+  * - SparseCholesky
+  * - IterativeLinearSolvers
  *
  * \code
  * #include <Eigen/Sparse>
  * \endcode
  */

-/** The type used to identify a general sparse storage. */
-struct Sparse {};
-
-#include "src/Sparse/SparseUtil.h"
-#include "src/Sparse/SparseMatrixBase.h"
-#include "src/Sparse/CompressedStorage.h"
-#include "src/Sparse/AmbiVector.h"
-#include "src/Sparse/SparseMatrix.h"
-#include "src/Sparse/DynamicSparseMatrix.h"
-#include "src/Sparse/MappedSparseMatrix.h"
-#include "src/Sparse/SparseVector.h"
-#include "src/Sparse/CoreIterators.h"
-#include "src/Sparse/SparseBlock.h"
-#include "src/Sparse/SparseTranspose.h"
-#include "src/Sparse/SparseCwiseUnaryOp.h"
-#include "src/Sparse/SparseCwiseBinaryOp.h"
-#include "src/Sparse/SparseDot.h"
-#include "src/Sparse/SparseAssign.h"
-#include "src/Sparse/SparseRedux.h"
-#include "src/Sparse/SparseFuzzy.h"
-#include "src/Sparse/SparseProduct.h"
-#include "src/Sparse/SparseSparseProduct.h"
-#include "src/Sparse/SparseDenseProduct.h"
-#include "src/Sparse/SparseDiagonalProduct.h"
-#include "src/Sparse/SparseTriangularView.h"
-#include "src/Sparse/SparseSelfAdjointView.h"
-#include "src/Sparse/TriangularSolver.h"
-#include "src/Sparse/SparseView.h"
-
 } // namespace Eigen

-#include "src/Core/util/ReenableStupidWarnings.h"
+#include "SparseCore"
+#include "OrderingMethods"
+#include "SparseCholesky"
+#include "IterativeLinearSolvers"

 #endif // EIGEN_SPARSE_MODULE_H

--- a/Eigen/SparseCholesky
+++ b/Eigen/SparseCholesky
@@ -0,0 +1,34 @@
+#ifndef EIGEN_SPARSECHOLESKY_MODULE_H
+#define EIGEN_SPARSECHOLESKY_MODULE_H
+
+#include "SparseCore"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+namespace Eigen {
+
+/** \ingroup Sparse_modules
+  * \defgroup SparseCholesky_Module SparseCholesky module
+  *
+  * This module currently provides two variants of the direct sparse Cholesky decomposition for selfadjoint (hermitian) matrices.
+  * Those decompositions are accessible via the following classes:
+  *  - SimplicialLLt,
+  *  - SimplicialLDLt
+  *
+  * Such problems can also be solved using the ConjugateGradient solver from the IterativeLinearSolvers module.
+  *
+  * \code
+  * #include <Eigen/SparseCholesky>
+  * \endcode
+  */
+
+#include "src/misc/Solve.h"
+#include "src/misc/SparseSolve.h"
+
+#include "src/SparseCholesky/SimplicialCholesky.h"
+
+} // namespace Eigen
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_SPARSECHOLESKY_MODULE_H
--- a/Eigen/SparseCore
+++ b/Eigen/SparseCore
@@ -0,0 +1,65 @@
+#ifndef EIGEN_SPARSECORE_MODULE_H
+#define EIGEN_SPARSECORE_MODULE_H
+
+#include "Core"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+#include <vector>
+#include <map>
+#include <cstdlib>
+#include <cstring>
+#include <algorithm>
+
+namespace Eigen {
+
+/** \ingroup Sparse_modules
+  * \defgroup SparseCore_Module SparseCore module
+  *
+  * This module provides a sparse matrix representation, and basic associatd matrix manipulations
+  * and operations.
+  *
+  * See the \ref TutorialSparse "Sparse tutorial"
+  *
+  * \code
+  * #include <Eigen/SparseCore>
+  * \endcode
+  *
+  * This module depends on: Core.
+  */
+
+/** The type used to identify a general sparse storage. */
+struct Sparse {};
+
+#include "src/SparseCore/SparseUtil.h"
+#include "src/SparseCore/SparseMatrixBase.h"
+#include "src/SparseCore/CompressedStorage.h"
+#include "src/SparseCore/AmbiVector.h"
+#include "src/SparseCore/SparseMatrix.h"
+#include "src/SparseCore/MappedSparseMatrix.h"
+#include "src/SparseCore/SparseVector.h"
+#include "src/SparseCore/CoreIterators.h"
+#include "src/SparseCore/SparseBlock.h"
+#include "src/SparseCore/SparseTranspose.h"
+#include "src/SparseCore/SparseCwiseUnaryOp.h"
+#include "src/SparseCore/SparseCwiseBinaryOp.h"
+#include "src/SparseCore/SparseDot.h"
+#include "src/SparseCore/SparseAssign.h"
+#include "src/SparseCore/SparseRedux.h"
+#include "src/SparseCore/SparseFuzzy.h"
+#include "src/SparseCore/ConservativeSparseSparseProduct.h"
+#include "src/SparseCore/SparseSparseProductWithPruning.h"
+#include "src/SparseCore/SparseProduct.h"
+#include "src/SparseCore/SparseDenseProduct.h"
+#include "src/SparseCore/SparseDiagonalProduct.h"
+#include "src/SparseCore/SparseTriangularView.h"
+#include "src/SparseCore/SparseSelfAdjointView.h"
+#include "src/SparseCore/TriangularSolver.h"
+#include "src/SparseCore/SparseView.h"
+
+} // namespace Eigen
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_SPARSECORE_MODULE_H
+
--- a/Eigen/SuperLUSupport
+++ b/Eigen/SuperLUSupport
@@ -0,0 +1,53 @@
+#ifndef EIGEN_SUPERLUSUPPORT_MODULE_H
+#define EIGEN_SUPERLUSUPPORT_MODULE_H
+
+#include "SparseCore"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+#ifdef EMPTY
+#define EIGEN_EMPTY_WAS_ALREADY_DEFINED
+#endif
+
+typedef int int_t;
+#include <slu_Cnames.h>
+#include <supermatrix.h>
+#include <slu_util.h>
+
+// slu_util.h defines a preprocessor token named EMPTY which is really polluting,
+// so we remove it in favor of a SUPERLU_EMPTY token.
+// If EMPTY was already, defined then we don't undef it.
+
+#if defined(EIGEN_EMPTY_WAS_ALREADY_DEFINED)
+# undef EIGEN_EMPTY_WAS_ALREADY_DEFINED
+#elif defined(EMPTY)
+# undef EMPTY
+#endif
+
+#define SUPERLU_EMPTY (-1)
+
+namespace Eigen { struct SluMatrix; }
+
+namespace Eigen {
+
+/** \ingroup Support_modules
+  * \defgroup SuperLUSupport_Module SuperLUSupport module
+  *
+  * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting.
+  *
+  * \code
+  * #include <Eigen/SuperLUSupport>
+  * \endcode
+  */
+
+#include "src/misc/Solve.h"
+#include "src/misc/SparseSolve.h"
+
+#include "src/SuperLUSupport/SuperLUSupport.h"
+
+
+} // namespace Eigen
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_SUPERLUSUPPORT_MODULE_H
--- a/Eigen/UmfPackSupport
+++ b/Eigen/UmfPackSupport
@@ -0,0 +1,34 @@
+#ifndef EIGEN_UMFPACKSUPPORT_MODULE_H
+#define EIGEN_UMFPACKSUPPORT_MODULE_H
+
+#include "SparseCore"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+extern "C" {
+#include <umfpack.h>
+}
+
+namespace Eigen {
+
+/** \ingroup Support_modules
+  * \defgroup UmfPackSupport_Module UmfPackSupport module
+  *
+  *
+  *
+  *
+  * \code
+  * #include <Eigen/UmfPackSupport>
+  * \endcode
+  */
+
+#include "src/misc/Solve.h"
+#include "src/misc/SparseSolve.h"
+
+#include "src/UmfPackSupport/UmfPackSupport.h"
+
+} // namespace Eigen
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_UMFPACKSUPPORT_MODULE_H
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -31,7 +31,7 @@ namespace internal {
 template<typename MatrixType, int UpLo> struct LDLT_Traits;
 }

-/** \ingroup cholesky_Module
+/** \ingroup Cholesky_Module
  *
  * \class LDLT
  *
@@ -158,10 +158,19 @@ template<typename _MatrixType, int _UpLo> class LDLT
    }

    /** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A.
+      *
+      * This function also supports in-place solves using the syntax <tt>x = decompositionObject.solve(x)</tt> .
      *
      * \note_about_checking_solutions
      *
-      * \sa solveInPlace(), MatrixBase::ldlt()
+      * More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$
+      * by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$, 
+      * \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then
+      * \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the
+      * least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
+      * computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular.
+      *
+      * \sa MatrixBase::ldlt()
      */
    template<typename Rhs>
    inline const internal::solve_retval<LDLT, Rhs>
@@ -376,7 +385,21 @@ struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
    dec().matrixL().solveInPlace(dst);

    // dst = D^-1 (L^-1 P b)
-    dst = dec().vectorD().asDiagonal().inverse() * dst;
+    // more precisely, use pseudo-inverse of D (see bug 241)
+    using std::abs;
+    using std::max;
+    typedef typename LDLTType::MatrixType MatrixType;
+    typedef typename LDLTType::Scalar Scalar;
+    typedef typename LDLTType::RealScalar RealScalar;
+    const Diagonal<const MatrixType> vectorD = dec().vectorD();
+    RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() * NumTraits<Scalar>::epsilon(),
+				 RealScalar(1) / NumTraits<RealScalar>::highest()); // motivated by LAPACK's xGELSS
+    for (Index i = 0; i < vectorD.size(); ++i) {
+      if(abs(vectorD(i)) > tolerance)
+	dst.row(i) /= vectorD(i);
+      else
+	dst.row(i).setZero();
+    }

    // dst = L^-T (D^-1 L^-1 P b)
    dec().matrixU().solveInPlace(dst);
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -29,7 +29,7 @@ namespace internal{
 template<typename MatrixType, int UpLo> struct LLT_Traits;
 }

-/** \ingroup cholesky_Module
+/** \ingroup Cholesky_Module
  *
  * \class LLT
  *
@@ -49,6 +49,9 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
  * use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine whether a system of equations
  * has a solution.
  *
+  * Example: \include LLT_example.cpp
+  * Output: \verbinclude LLT_example.out
+  *    
  * \sa MatrixBase::llt(), class LDLT
  */
 /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
@@ -178,6 +181,9 @@ template<typename _MatrixType, int _UpLo> class LLT
    inline Index rows() const { return m_matrix.rows(); }
    inline Index cols() const { return m_matrix.cols(); }

+    template<typename VectorType>
+    void rankUpdate(const VectorType& vec);
+
  protected:
    /** \internal
      * Used to compute and store L
@@ -233,7 +239,7 @@ template<> struct llt_inplace<Lower>

    Index blockSize = size/8;
    blockSize = (blockSize/16)*16;
-    blockSize = std::min(std::max(blockSize,Index(8)), Index(128));
+    blockSize = (std::min)((std::max)(blockSize,Index(8)), Index(128));

    for (Index k=0; k<size; k+=blockSize)
    {
@@ -241,7 +247,7 @@ template<> struct llt_inplace<Lower>
      //       A00 |  -  |  -
      // lu  = A10 | A11 |  -
      //       A20 | A21 | A22
-      Index bs = std::min(blockSize, size-k);
+      Index bs = (std::min)(blockSize, size-k);
      Index rs = size - k - bs;
      Block<MatrixType,Dynamic,Dynamic> A11(m,k,   k,   bs,bs);
      Block<MatrixType,Dynamic,Dynamic> A21(m,k+bs,k,   rs,bs);
@@ -254,6 +260,35 @@ template<> struct llt_inplace<Lower>
    }
    return -1;
  }
+
+  template<typename MatrixType, typename VectorType>
+  static void rankUpdate(MatrixType& mat, const VectorType& vec)
+  {
+    typedef typename MatrixType::ColXpr ColXpr;
+    typedef typename internal::remove_all<ColXpr>::type ColXprCleaned;
+    typedef typename ColXprCleaned::SegmentReturnType ColXprSegment;
+    typedef typename MatrixType::Scalar Scalar;
+    typedef Matrix<Scalar,Dynamic,1> TempVectorType;
+    typedef typename TempVectorType::SegmentReturnType TempVecSegment;
+
+    int n = mat.cols();
+    eigen_assert(mat.rows()==n && vec.size()==n);
+    TempVectorType temp(vec);
+
+    for(int i=0; i<n; ++i)
+    {
+      JacobiRotation<Scalar> g;
+      g.makeGivens(mat(i,i), -temp(i), &mat(i,i));
+
+      int rs = n-i-1;
+      if(rs>0)
+      {
+        ColXprSegment x(mat.col(i).tail(rs));
+        TempVecSegment y(temp.tail(rs));
+        apply_rotation_in_the_plane(x, y, g);
+      }
+    }
+  }
 };

 template<> struct llt_inplace<Upper>
@@ -270,6 +305,12 @@ template<> struct llt_inplace<Upper>
    Transpose<MatrixType> matt(mat);
    return llt_inplace<Lower>::blocked(matt);
  }
+  template<typename MatrixType, typename VectorType>
+  static void rankUpdate(MatrixType& mat, const VectorType& vec)
+  {
+    Transpose<MatrixType> matt(mat);
+    return llt_inplace<Lower>::rankUpdate(matt, vec.conjugate());
+  }
 };

 template<typename MatrixType> struct LLT_Traits<MatrixType,Lower>
@@ -295,9 +336,11 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
 } // end namespace internal

 /** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix
-  *
  *
  * \returns a reference to *this
+  *
+  * Example: \include TutorialLinAlgComputeTwice.cpp
+  * Output: \verbinclude TutorialLinAlgComputeTwice.out
  */
 template<typename MatrixType, int _UpLo>
 LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
@@ -314,6 +357,20 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
  return *this;
 }

+/** Performs a rank one update of the current decomposition.
+  * If A = LL^* before the rank one update,
+  * then after it we have LL^* = A + vv^* where \a v must be a vector
+  * of same dimension.
+  *
+  */
+template<typename MatrixType, int _UpLo>
+template<typename VectorType>
+void LLT<MatrixType,_UpLo>::rankUpdate(const VectorType& v)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType);
+  internal::llt_inplace<UpLo>::rankUpdate(m_matrix,v);
+}
+    
 namespace internal {
 template<typename _MatrixType, int UpLo, typename Rhs>
 struct solve_retval<LLT<_MatrixType, UpLo>, Rhs>
@@ -384,3 +441,4 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
 }

 #endif // EIGEN_LLT_H
+
--- a/Eigen/src/CholmodSupport/CMakeLists.txt
+++ b/Eigen/src/CholmodSupport/CMakeLists.txt
@@ -0,0 +1,6 @@
+FILE(GLOB Eigen_CholmodSupport_SRCS "*.h")
+
+INSTALL(FILES 
+  ${Eigen_CholmodSupport_SRCS}
+  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel
+  )
--- a/unsupported/Eigen/src/SparseExtra/CholmodSupport.h
+++ b/unsupported/Eigen/src/SparseExtra/CholmodSupport.h
@@ -69,9 +69,9 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
  res.nzmax   = mat.nonZeros();
  res.nrow    = mat.rows();;
  res.ncol    = mat.cols();
-  res.p       = mat._outerIndexPtr();
-  res.i       = mat._innerIndexPtr();
-  res.x       = mat._valuePtr();
+  res.p       = mat.outerIndexPtr();
+  res.i       = mat.innerIndexPtr();
+  res.x       = mat.valuePtr();
  res.sorted  = 1;
  res.packed  = 1;
  res.dtype   = 0;
@@ -149,7 +149,9 @@ enum CholmodMode {
  CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt
 };

-/** \brief A Cholesky factorization and solver based on Cholmod
+/** \ingroup CholmodSupport_Module
+  * \class CholmodDecomposition
+  * \brief A Cholesky factorization and solver based on Cholmod
  *
  * This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization
  * using the Cholmod library. The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
@@ -159,6 +161,7 @@ enum CholmodMode {
  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
  *               or Upper. Default is Lower.
  *
+  * \sa \ref TutorialSparseDirectSolvers
  */
 template<typename _MatrixType, int _UpLo = Lower>
 class CholmodDecomposition
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -68,10 +68,8 @@ class Array
    friend struct internal::conservative_resize_like_impl;

    using Base::m_storage;
+
  public:
-    enum { NeedsToAlign = (!(Options&DontAlign))
-                          && SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
-    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)

    using Base::base;
    using Base::coeff;
--- a/Eigen/src/Core/ArrayWrapper.h
+++ b/Eigen/src/Core/ArrayWrapper.h
@@ -53,6 +53,12 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
    EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)

+    typedef typename internal::conditional<
+                       internal::is_lvalue<ExpressionType>::value,
+                       Scalar,
+                       const Scalar
+                     >::type ScalarWithConstIfNotLvalue;
+
    typedef typename internal::nested<ExpressionType>::type NestedExpressionType;

    inline ArrayWrapper(const ExpressionType& matrix) : m_expression(matrix) {}
@@ -62,6 +68,9 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
    inline Index outerStride() const { return m_expression.outerStride(); }
    inline Index innerStride() const { return m_expression.innerStride(); }

+    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
+    inline const Scalar* data() const { return m_expression.data(); }
+
    inline const CoeffReturnType coeff(Index row, Index col) const
    {
      return m_expression.coeff(row, col);
@@ -119,6 +128,12 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
    template<typename Dest>
    inline void evalTo(Dest& dst) const { dst = m_expression; }

+    const typename internal::remove_all<NestedExpressionType>::type& 
+    nestedExpression() const 
+    {
+      return m_expression;
+    }
+
  protected:
    const NestedExpressionType m_expression;
 };
@@ -151,6 +166,12 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
    EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)

+    typedef typename internal::conditional<
+                       internal::is_lvalue<ExpressionType>::value,
+                       Scalar,
+                       const Scalar
+                     >::type ScalarWithConstIfNotLvalue;
+
    typedef typename internal::nested<ExpressionType>::type NestedExpressionType;

    inline MatrixWrapper(const ExpressionType& matrix) : m_expression(matrix) {}
@@ -160,6 +181,9 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
    inline Index outerStride() const { return m_expression.outerStride(); }
    inline Index innerStride() const { return m_expression.innerStride(); }

+    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
+    inline const Scalar* data() const { return m_expression.data(); }
+
    inline const CoeffReturnType coeff(Index row, Index col) const
    {
      return m_expression.coeff(row, col);
@@ -214,6 +238,12 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
      m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
    }

+    const typename internal::remove_all<NestedExpressionType>::type& 
+    nestedExpression() const 
+    {
+      return m_expression;
+    }
+
  protected:
    const NestedExpressionType m_expression;
 };
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -251,21 +251,22 @@ struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>

 template<typename Derived1, typename Derived2,
         int Traversal = assign_traits<Derived1, Derived2>::Traversal,
-         int Unrolling = assign_traits<Derived1, Derived2>::Unrolling>
+         int Unrolling = assign_traits<Derived1, Derived2>::Unrolling,
+         int Version = Specialized>
 struct assign_impl;

 /************************
 *** Default traversal ***
 ************************/

-template<typename Derived1, typename Derived2, int Unrolling>
-struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling>
+template<typename Derived1, typename Derived2, int Unrolling, int Version>
+struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
 {
  inline static void run(Derived1 &, const Derived2 &) { }
 };

-template<typename Derived1, typename Derived2>
-struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  inline static void run(Derived1 &dst, const Derived2 &src)
@@ -278,8 +279,8 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
  }
 };

-template<typename Derived1, typename Derived2>
-struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
 {
  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
@@ -288,8 +289,8 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
  }
 };

-template<typename Derived1, typename Derived2>
-struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
@@ -305,8 +306,8 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
 *** Linear traversal ***
 ***********************/

-template<typename Derived1, typename Derived2>
-struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  inline static void run(Derived1 &dst, const Derived2 &src)
@@ -317,8 +318,8 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
  }
 };

-template<typename Derived1, typename Derived2>
-struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
 {
  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
@@ -331,8 +332,8 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
 *** Inner vectorization ***
 **************************/

-template<typename Derived1, typename Derived2>
-struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  inline static void run(Derived1 &dst, const Derived2 &src)
@@ -346,8 +347,8 @@ struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
  }
 };

-template<typename Derived1, typename Derived2>
-struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling>
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling, Version>
 {
  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
@@ -356,8 +357,8 @@ struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolli
  }
 };

-template<typename Derived1, typename Derived2>
-struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling>
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
@@ -398,8 +399,8 @@ struct unaligned_assign_impl<false>
  }
 };

-template<typename Derived1, typename Derived2>
-struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
@@ -426,8 +427,8 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
  }
 };

-template<typename Derived1, typename Derived2>
-struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
@@ -445,8 +446,8 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnroll
 *** Slice vectorization ***
 ***************************/

-template<typename Derived1, typename Derived2>
-struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Version>
 {
  typedef typename Derived1::Index Index;
  inline static void run(Derived1 &dst, const Derived2 &src)
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -0,0 +1,682 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_ASSIGN_EVALUATOR_H
+#define EIGEN_ASSIGN_EVALUATOR_H
+
+// This implementation is based on Assign.h
+
+namespace internal {
+  
+/***************************************************************************
+* Part 1 : the logic deciding a strategy for traversal and unrolling       *
+***************************************************************************/
+
+// copy_using_evaluator_traits is based on assign_traits
+// (actually, it's identical)
+
+template <typename Derived, typename OtherDerived>
+struct copy_using_evaluator_traits
+{
+public:
+  enum {
+    DstIsAligned = Derived::Flags & AlignedBit,
+    DstHasDirectAccess = Derived::Flags & DirectAccessBit,
+    SrcIsAligned = OtherDerived::Flags & AlignedBit,
+    JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
+  };
+
+private:
+  enum {
+    InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
+              : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
+              : int(Derived::RowsAtCompileTime),
+    InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
+              : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
+              : int(Derived::MaxRowsAtCompileTime),
+    MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
+    PacketSize = packet_traits<typename Derived::Scalar>::size
+  };
+
+  enum {
+    StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
+    MightVectorize = StorageOrdersAgree
+                  && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
+    MayInnerVectorize  = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
+                       && int(DstIsAligned) && int(SrcIsAligned),
+    MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
+    MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
+                       && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
+      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
+         so it's only good for large enough sizes. */
+    MaySliceVectorize  = MightVectorize && DstHasDirectAccess
+                       && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
+      /* slice vectorization can be slow, so we only want it if the slices are big, which is
+         indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
+         in a fixed-size matrix */
+  };
+
+public:
+  enum {
+    Traversal = int(MayInnerVectorize)  ? int(InnerVectorizedTraversal)
+              : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
+              : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)
+              : int(MayLinearize)       ? int(LinearTraversal)
+                                        : int(DefaultTraversal),
+    Vectorized = int(Traversal) == InnerVectorizedTraversal
+              || int(Traversal) == LinearVectorizedTraversal
+              || int(Traversal) == SliceVectorizedTraversal
+  };
+
+private:
+  enum {
+    UnrollingLimit      = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
+    MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
+                       && int(OtherDerived::CoeffReadCost) != Dynamic
+                       && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
+    MayUnrollInner      = int(InnerSize) != Dynamic
+                       && int(OtherDerived::CoeffReadCost) != Dynamic
+                       && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
+  };
+
+public:
+  enum {
+    Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
+                ? (
+ 		    int(MayUnrollCompletely) ? int(CompleteUnrolling)
+                  : int(MayUnrollInner)      ? int(InnerUnrolling)
+                                             : int(NoUnrolling)
+                  )
+              : int(Traversal) == int(LinearVectorizedTraversal)
+                ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) 
+                                                                    : int(NoUnrolling) )
+              : int(Traversal) == int(LinearTraversal)
+                ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) 
+                                              : int(NoUnrolling) )
+              : int(NoUnrolling)
+  };
+
+#ifdef EIGEN_DEBUG_ASSIGN
+  static void debug()
+  {
+    EIGEN_DEBUG_VAR(DstIsAligned)
+    EIGEN_DEBUG_VAR(SrcIsAligned)
+    EIGEN_DEBUG_VAR(JointAlignment)
+    EIGEN_DEBUG_VAR(InnerSize)
+    EIGEN_DEBUG_VAR(InnerMaxSize)
+    EIGEN_DEBUG_VAR(PacketSize)
+    EIGEN_DEBUG_VAR(StorageOrdersAgree)
+    EIGEN_DEBUG_VAR(MightVectorize)
+    EIGEN_DEBUG_VAR(MayLinearize)
+    EIGEN_DEBUG_VAR(MayInnerVectorize)
+    EIGEN_DEBUG_VAR(MayLinearVectorize)
+    EIGEN_DEBUG_VAR(MaySliceVectorize)
+    EIGEN_DEBUG_VAR(Traversal)
+    EIGEN_DEBUG_VAR(UnrollingLimit)
+    EIGEN_DEBUG_VAR(MayUnrollCompletely)
+    EIGEN_DEBUG_VAR(MayUnrollInner)
+    EIGEN_DEBUG_VAR(Unrolling)
+  }
+#endif
+};
+
+/***************************************************************************
+* Part 2 : meta-unrollers
+***************************************************************************/
+
+/************************
+*** Default traversal ***
+************************/
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
+{
+  typedef typename DstEvaluatorType::XprType DstXprType;
+  
+  enum {
+    outer = Index / DstXprType::InnerSizeAtCompileTime,
+    inner = Index % DstXprType::InnerSizeAtCompileTime
+  };
+
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
+				      SrcEvaluatorType &srcEvaluator)
+  {
+    dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
+    copy_using_evaluator_DefaultTraversal_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
+      ::run(dstEvaluator, srcEvaluator);
+  }
+};
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
+struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+};
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
+				      SrcEvaluatorType &srcEvaluator, 
+				      int outer)
+  {
+    dstEvaluator.copyCoeffByOuterInner(outer, Index, srcEvaluator);
+    copy_using_evaluator_DefaultTraversal_InnerUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
+      ::run(dstEvaluator, srcEvaluator, outer);
+  }
+};
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
+struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
+};
+
+/***********************
+*** Linear traversal ***
+***********************/
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
+				      SrcEvaluatorType &srcEvaluator)
+  {
+    dstEvaluator.copyCoeff(Index, srcEvaluator);
+    copy_using_evaluator_LinearTraversal_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
+      ::run(dstEvaluator, srcEvaluator);
+  }
+};
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
+struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+};
+
+/**************************
+*** Inner vectorization ***
+**************************/
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+struct copy_using_evaluator_innervec_CompleteUnrolling
+{
+  typedef typename DstEvaluatorType::XprType DstXprType;
+  typedef typename SrcEvaluatorType::XprType SrcXprType;
+
+  enum {
+    outer = Index / DstXprType::InnerSizeAtCompileTime,
+    inner = Index % DstXprType::InnerSizeAtCompileTime,
+    JointAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
+  };
+
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
+				      SrcEvaluatorType &srcEvaluator)
+  {
+    dstEvaluator.template copyPacketByOuterInner<Aligned, JointAlignment>(outer, inner, srcEvaluator);
+    enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
+    copy_using_evaluator_innervec_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, NextIndex, Stop>
+      ::run(dstEvaluator, srcEvaluator);
+  }
+};
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
+struct copy_using_evaluator_innervec_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+};
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+struct copy_using_evaluator_innervec_InnerUnrolling
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
+				      SrcEvaluatorType &srcEvaluator, 
+				      int outer)
+  {
+    dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, Index, srcEvaluator);
+    typedef typename DstEvaluatorType::XprType DstXprType;
+    enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
+    copy_using_evaluator_innervec_InnerUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, NextIndex, Stop>
+      ::run(dstEvaluator, srcEvaluator, outer);
+  }
+};
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
+struct copy_using_evaluator_innervec_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
+};
+
+/***************************************************************************
+* Part 3 : implementation of all cases
+***************************************************************************/
+
+// copy_using_evaluator_impl is based on assign_impl
+
+template<typename DstXprType, typename SrcXprType,
+         int Traversal = copy_using_evaluator_traits<DstXprType, SrcXprType>::Traversal,
+         int Unrolling = copy_using_evaluator_traits<DstXprType, SrcXprType>::Unrolling>
+struct copy_using_evaluator_impl;
+
+/************************
+*** Default traversal ***
+************************/
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnrolling>
+{
+  static void run(DstXprType& dst, const SrcXprType& src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+    typedef typename DstXprType::Index Index;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    for(Index outer = 0; outer < dst.outerSize(); ++outer) {
+      for(Index inner = 0; inner < dst.innerSize(); ++inner) {
+	dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
+      }
+    }
+  }
+};
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, CompleteUnrolling>
+{
+  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    copy_using_evaluator_DefaultTraversal_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
+      ::run(dstEvaluator, srcEvaluator);
+  }
+};
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, InnerUnrolling>
+{
+  typedef typename DstXprType::Index Index;
+  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    const Index outerSize = dst.outerSize();
+    for(Index outer = 0; outer < outerSize; ++outer)
+      copy_using_evaluator_DefaultTraversal_InnerUnrolling
+	<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
+        ::run(dstEvaluator, srcEvaluator, outer);
+  }
+};
+
+/***************************
+*** Linear vectorization ***
+***************************/
+
+template <bool IsAligned = false>
+struct unaligned_copy_using_evaluator_impl
+{
+  // if IsAligned = true, then do nothing
+  template <typename SrcEvaluatorType, typename DstEvaluatorType>
+  static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, 
+				      typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {}
+};
+
+template <>
+struct unaligned_copy_using_evaluator_impl<false>
+{
+  // MSVC must not inline this functions. If it does, it fails to optimize the
+  // packet access path.
+#ifdef _MSC_VER
+  template <typename DstEvaluatorType, typename SrcEvaluatorType>
+  static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator, 
+				    const SrcEvaluatorType &srcEvaluator, 
+				    typename DstEvaluatorType::Index start, 
+				    typename DstEvaluatorType::Index end)
+#else
+  template <typename DstEvaluatorType, typename SrcEvaluatorType>
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, 
+				      const SrcEvaluatorType &srcEvaluator, 
+				      typename DstEvaluatorType::Index start, 
+				      typename DstEvaluatorType::Index end)
+#endif
+  {
+    for (typename DstEvaluatorType::Index index = start; index < end; ++index)
+      dstEvaluator.copyCoeff(index, srcEvaluator);
+  }
+};
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, NoUnrolling>
+{
+  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+    typedef typename DstXprType::Index Index;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    const Index size = dst.size();
+    typedef packet_traits<typename DstXprType::Scalar> PacketTraits;
+    enum {
+      packetSize = PacketTraits::size,
+      dstIsAligned = int(copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned),
+      dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
+      srcAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
+    };
+    const Index alignedStart = dstIsAligned ? 0 : first_aligned(&dstEvaluator.coeffRef(0), size);
+    const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
+
+    unaligned_copy_using_evaluator_impl<dstIsAligned!=0>::run(dstEvaluator, srcEvaluator, 0, alignedStart);
+
+    for(Index index = alignedStart; index < alignedEnd; index += packetSize)
+    {
+      dstEvaluator.template copyPacket<dstAlignment, srcAlignment>(index, srcEvaluator);
+    }
+
+    unaligned_copy_using_evaluator_impl<>::run(dstEvaluator, srcEvaluator, alignedEnd, size);
+  }
+};
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, CompleteUnrolling>
+{
+  typedef typename DstXprType::Index Index;
+  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    enum { size = DstXprType::SizeAtCompileTime,
+           packetSize = packet_traits<typename DstXprType::Scalar>::size,
+           alignedSize = (size/packetSize)*packetSize };
+
+    copy_using_evaluator_innervec_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, 0, alignedSize>
+      ::run(dstEvaluator, srcEvaluator);
+    copy_using_evaluator_DefaultTraversal_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, alignedSize, size>
+      ::run(dstEvaluator, srcEvaluator);
+  }
+};
+
+/**************************
+*** Inner vectorization ***
+**************************/
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, NoUnrolling>
+{
+  inline static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+    typedef typename DstXprType::Index Index;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    const Index innerSize = dst.innerSize();
+    const Index outerSize = dst.outerSize();
+    const Index packetSize = packet_traits<typename DstXprType::Scalar>::size;
+    for(Index outer = 0; outer < outerSize; ++outer)
+      for(Index inner = 0; inner < innerSize; inner+=packetSize) {
+	dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, inner, srcEvaluator);
+      }
+  }
+};
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, CompleteUnrolling>
+{
+  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    copy_using_evaluator_innervec_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
+      ::run(dstEvaluator, srcEvaluator);
+  }
+};
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, InnerUnrolling>
+{
+  typedef typename DstXprType::Index Index;
+  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    const Index outerSize = dst.outerSize();
+    for(Index outer = 0; outer < outerSize; ++outer)
+      copy_using_evaluator_innervec_InnerUnrolling
+	<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
+        ::run(dstEvaluator, srcEvaluator, outer);
+  }
+};
+
+/***********************
+*** Linear traversal ***
+***********************/
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnrolling>
+{
+  inline static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+    typedef typename DstXprType::Index Index;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    const Index size = dst.size();
+    for(Index i = 0; i < size; ++i)
+      dstEvaluator.copyCoeff(i, srcEvaluator);
+  }
+};
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, CompleteUnrolling>
+{
+  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    copy_using_evaluator_LinearTraversal_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
+      ::run(dstEvaluator, srcEvaluator);
+  }
+};
+
+/**************************
+*** Slice vectorization ***
+***************************/
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversal, NoUnrolling>
+{
+  inline static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+    typedef typename DstXprType::Index Index;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    typedef packet_traits<typename DstXprType::Scalar> PacketTraits;
+    enum {
+      packetSize = PacketTraits::size,
+      alignable = PacketTraits::AlignedOnScalar,
+      dstAlignment = alignable ? Aligned : int(copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned) ,
+      srcAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
+    };
+    const Index packetAlignedMask = packetSize - 1;
+    const Index innerSize = dst.innerSize();
+    const Index outerSize = dst.outerSize();
+    const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
+    Index alignedStart = ((!alignable) || copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned) ? 0
+                       : first_aligned(&dstEvaluator.coeffRef(0,0), innerSize);
+
+    for(Index outer = 0; outer < outerSize; ++outer)
+    {
+      const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
+      // do the non-vectorizable part of the assignment
+      for(Index inner = 0; inner<alignedStart ; ++inner) {
+        dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
+      }
+
+      // do the vectorizable part of the assignment
+      for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) {
+        dstEvaluator.template copyPacketByOuterInner<dstAlignment, srcAlignment>(outer, inner, srcEvaluator);
+      }
+
+      // do the non-vectorizable part of the assignment
+      for(Index inner = alignedEnd; inner<innerSize ; ++inner) {
+        dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
+      }
+
+      alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
+    }
+  }
+};
+
+/***************************************************************************
+* Part 4 : Entry points
+***************************************************************************/
+
+// Based on DenseBase::LazyAssign()
+
+template<typename DstXprType, typename SrcXprType>
+const DstXprType& copy_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+{
+#ifdef EIGEN_DEBUG_ASSIGN
+  internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
+#endif
+  copy_using_evaluator_impl<DstXprType, SrcXprType>::run(const_cast<DstXprType&>(dst), src);
+  return dst;
+}
+
+// Based on DenseBase::swap()
+// TODO: Chech whether we need to do something special for swapping two
+//       Arrays or Matrices.
+
+template<typename DstXprType, typename SrcXprType>
+void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+{
+  copy_using_evaluator(SwapWrapper<DstXprType>(const_cast<DstXprType&>(dst)), src);
+}
+
+// Based on MatrixBase::operator+= (in CwiseBinaryOp.h)
+template<typename DstXprType, typename SrcXprType>
+void add_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
+{
+  typedef typename DstXprType::Scalar Scalar;
+  SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
+  copy_using_evaluator(tmp, src.derived());
+}
+
+// Based on ArrayBase::operator+=
+template<typename DstXprType, typename SrcXprType>
+void add_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+{
+  typedef typename DstXprType::Scalar Scalar;
+  SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
+  copy_using_evaluator(tmp, src.derived());
+}
+
+// TODO: Add add_assign_using_evaluator for EigenBase ?
+
+template<typename DstXprType, typename SrcXprType>
+void subtract_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
+{
+  typedef typename DstXprType::Scalar Scalar;
+  SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
+  copy_using_evaluator(tmp, src.derived());
+}
+
+template<typename DstXprType, typename SrcXprType>
+void subtract_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+{
+  typedef typename DstXprType::Scalar Scalar;
+  SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
+  copy_using_evaluator(tmp, src.derived());
+}
+
+template<typename DstXprType, typename SrcXprType>
+void multiply_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+{
+  typedef typename DstXprType::Scalar Scalar;
+  SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
+  copy_using_evaluator(tmp, src.derived());
+}
+
+template<typename DstXprType, typename SrcXprType>
+void divide_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+{
+  typedef typename DstXprType::Scalar Scalar;
+  SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
+  copy_using_evaluator(tmp, src.derived());
+}
+
+
+} // namespace internal
+
+#endif // EIGEN_ASSIGN_EVALUATOR_H
--- a/Eigen/src/Core/BandMatrix.h
+++ b/Eigen/src/Core/BandMatrix.h
@@ -87,7 +87,7 @@ class BandMatrixBase : public EigenBase<Derived>
      if (i<=supers())
      {
        start = supers()-i;
-        len = std::min(rows(),std::max<Index>(0,coeffs().rows() - (supers()-i)));
+        len = (std::min)(rows(),std::max<Index>(0,coeffs().rows() - (supers()-i)));
      }
      else if (i>=rows()-subs())
        len = std::max<Index>(0,coeffs().rows() - (i + 1 - rows() + subs()));
@@ -96,11 +96,11 @@ class BandMatrixBase : public EigenBase<Derived>

    /** \returns a vector expression of the main diagonal */
    inline Block<CoefficientsType,1,SizeAtCompileTime> diagonal()
-    { return Block<CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,std::min(rows(),cols())); }
+    { return Block<CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }

    /** \returns a vector expression of the main diagonal (const version) */
    inline const Block<const CoefficientsType,1,SizeAtCompileTime> diagonal() const
-    { return Block<const CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,std::min(rows(),cols())); }
+    { return Block<const CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }

    template<int Index> struct DiagonalIntReturnType {
      enum {
@@ -122,13 +122,13 @@ class BandMatrixBase : public EigenBase<Derived>
    /** \returns a vector expression of the \a N -th sub or super diagonal */
    template<int N> inline typename DiagonalIntReturnType<N>::Type diagonal()
    {
-      return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, std::max(0,N), 1, diagonalLength(N));
+      return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
    }

    /** \returns a vector expression of the \a N -th sub or super diagonal */
    template<int N> inline const typename DiagonalIntReturnType<N>::Type diagonal() const
    {
-      return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, std::max(0,N), 1, diagonalLength(N));
+      return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
    }

    /** \returns a vector expression of the \a i -th sub or super diagonal */
@@ -166,7 +166,7 @@ class BandMatrixBase : public EigenBase<Derived>
  protected:

    inline Index diagonalLength(Index i) const
-    { return i<0 ? std::min(cols(),rows()+i) : std::min(rows(),cols()-i); }
+    { return i<0 ? (std::min)(cols(),rows()+i) : (std::min)(rows(),cols()-i); }
 };

 /**
@@ -180,7 +180,7 @@ class BandMatrixBase : public EigenBase<Derived>
  * \param Cols Number of columns, or \b Dynamic
  * \param Supers Number of super diagonal
  * \param Subs Number of sub diagonal
-  * \param _Options A combination of either \b RowMajor or \b ColMajor, and of \b SelfAdjoint
+  * \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
  *                 The former controls \ref TopicStorageOrders "storage order", and defaults to
  *                 column-major. The latter controls whether the matrix represents a selfadjoint 
  *                 matrix in which case either Supers of Subs have to be null.
@@ -284,6 +284,7 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
      : m_coeffs(coeffs),
        m_rows(rows), m_supers(supers), m_subs(subs)
    {
+      EIGEN_UNUSED_VARIABLE(cols);
      //internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
    }

--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -94,7 +94,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess>
    MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
                       && (InnerStrideAtCompileTime == 1)
                        ? PacketAccessBit : 0,
-    MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && ((OuterStrideAtCompileTime % packet_traits<Scalar>::size) == 0)) ? AlignedBit : 0,
+    MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * sizeof(Scalar)) % 16) == 0)) ? AlignedBit : 0,
    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
    FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
    FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
@@ -242,6 +242,21 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
    inline Index outerStride() const;
    #endif

+    const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const 
+    { 
+      return m_xpr; 
+    }
+      
+    Index startRow() const 
+    { 
+      return m_startRow.value(); 
+    }
+      
+    Index startCol() const 
+    { 
+      return m_startCol.value(); 
+    }
+
  protected:

    const typename XprType::Nested m_xpr;
@@ -304,6 +319,11 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
      init();
    }

+    const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const 
+    { 
+      return m_xpr; 
+    }
+      
    /** \sa MapBase::innerStride() */
    inline Index innerStride() const
    {
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -101,6 +101,9 @@ class CwiseNullaryOp : internal::no_assignment_operator,
      return m_functor.packetOp(index);
    }

+    /** \returns the functor representing the nullary operation */
+    const NullaryOp& functor() const { return m_functor; }
+
  protected:
    const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
    const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
@@ -742,7 +745,7 @@ struct setIdentity_impl<Derived, true>
  static EIGEN_STRONG_INLINE Derived& run(Derived& m)
  {
    m.setZero();
-    const Index size = std::min(m.rows(), m.cols());
+    const Index size = (std::min)(m.rows(), m.cols());
    for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
    return m;
  }
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -35,7 +35,7 @@ template<typename T> struct add_const_on_value_type_if_arithmetic
 /** \brief Base class providing read-only coefficient access to matrices and arrays.
  * \ingroup Core_Module
  * \tparam Derived Type of the derived class
-  * \tparam ReadOnlyAccessors Constant indicating read-only access
+  * \tparam #ReadOnlyAccessors Constant indicating read-only access
  *
  * This class defines the \c operator() \c const function and friends, which can be used to read specific
  * entries of a matrix or array.
@@ -212,7 +212,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      * to ensure that a packet really starts there. This method is only available on expressions having the
      * PacketAccessBit.
      *
-      * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
+      * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
      * starting at an address which is a multiple of the packet size.
      */
@@ -239,7 +239,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      * to ensure that a packet really starts there. This method is only available on expressions having the
      * PacketAccessBit and the LinearAccessBit.
      *
-      * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
+      * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
      * starting at an address which is a multiple of the packet size.
      */
@@ -275,7 +275,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
 /** \brief Base class providing read/write coefficient access to matrices and arrays.
  * \ingroup Core_Module
  * \tparam Derived Type of the derived class
-  * \tparam WriteAccessors Constant indicating read/write access
+  * \tparam #WriteAccessors Constant indicating read/write access
  *
  * This class defines the non-const \c operator() function and friends, which can be used to write specific
  * entries of a matrix or array. This class inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which
@@ -433,7 +433,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      * to ensure that a packet really starts there. This method is only available on expressions having the
      * PacketAccessBit.
      *
-      * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
+      * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
      * starting at an address which is a multiple of the packet size.
      */
@@ -567,7 +567,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
 /** \brief Base class providing direct read-only coefficient access to matrices and arrays.
  * \ingroup Core_Module
  * \tparam Derived Type of the derived class
-  * \tparam DirectAccessors Constant indicating direct access
+  * \tparam #DirectAccessors Constant indicating direct access
  *
  * This class defines functions to work with strides which can be used to access entries directly. This class
  * inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using
@@ -637,7 +637,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
 /** \brief Base class providing direct read/write coefficient access to matrices and arrays.
  * \ingroup Core_Module
  * \tparam Derived Type of the derived class
-  * \tparam DirectAccessors Constant indicating direct access
+  * \tparam #DirectWriteAccessors Constant indicating direct access
  *
  * This class defines functions to work with strides which can be used to access entries directly. This class
  * inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -58,7 +58,7 @@ struct plain_array
  #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
    eigen_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
              && "this assertion is explained here: " \
-              "http://eigen.tuxfamily.org/dox/UnalignedArrayAssert.html" \
+              "http://eigen.tuxfamily.org/dox-devel/TopicUnalignedArrayAssert.html" \
              " **** READ THIS WEB PAGE !!! ****");
 #endif

@@ -128,6 +128,16 @@ template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0
    inline T *data() { return 0; }
 };

+// more specializations for null matrices; these are necessary to resolve ambiguities
+template<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+
+template<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+
+template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+
 // dynamic-size matrix with fixed-size storage
 template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
 {
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@@ -87,7 +87,7 @@ template<typename MatrixType, int DiagIndex> class Diagonal
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)

    inline Index rows() const
-    { return m_index.value()<0 ? std::min(m_matrix.cols(),m_matrix.rows()+m_index.value()) : std::min(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
+    { return m_index.value()<0 ? (std::min)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }

    inline Index cols() const { return 1; }

@@ -133,6 +133,17 @@ template<typename MatrixType, int DiagIndex> class Diagonal
      return m_matrix.coeff(index+rowOffset(), index+colOffset());
    }

+    const typename internal::remove_all<typename MatrixType::Nested>::type& 
+    nestedExpression() const 
+    {
+      return m_matrix;
+    }
+
+    int index() const
+    {
+      return m_index.value();
+    }
+
  protected:
    const typename MatrixType::Nested m_matrix;
    const internal::variable_if_dynamic<Index, DiagIndex> m_index;
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -116,7 +116,9 @@ MatrixBase<Derived>::eigen2_dot(const MatrixBase<OtherDerived>& other) const

 //---------- implementation of L2 norm and related functions ----------

-/** \returns the squared \em l2 norm of *this, i.e., for vectors, the dot product of *this with itself.
+/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
+  * In both cases, it consists in the sum of the square of all the matrix entries.
+  * For vectors, this is also equals to the dot product of \c *this with itself.
  *
  * \sa dot(), norm()
  */
@@ -126,7 +128,9 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
  return internal::real((*this).cwiseAbs2().sum());
 }

-/** \returns the \em l2 norm of *this, i.e., for vectors, the square root of the dot product of *this with itself.
+/** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm.
+  * In both cases, it consists in the square root of the sum of the square of all the matrix entries.
+  * For vectors, this is also equals to the square root of the dot product of \c *this with itself.
  *
  * \sa dot(), squaredNorm()
  */
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -116,7 +116,7 @@ struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
  */
 template<typename Scalar> struct scalar_min_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::min(a, b); }
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::min; return (min)(a, b); }
  template<typename Packet>
  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return internal::pmin(a,b); }
@@ -139,7 +139,7 @@ struct functor_traits<scalar_min_op<Scalar> > {
  */
 template<typename Scalar> struct scalar_max_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::max(a, b); }
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::max; return (max)(a, b); }
  template<typename Packet>
  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return internal::pmax(a,b); }
@@ -165,8 +165,10 @@ template<typename Scalar> struct scalar_hypot_op {
 //   typedef typename NumTraits<Scalar>::Real result_type;
  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
  {
-    Scalar p = std::max(_x, _y);
-    Scalar q = std::min(_x, _y);
+    using std::max;
+    using std::min;
+    Scalar p = (max)(_x, _y);
+    Scalar q = (min)(_x, _y);
    Scalar qp = q/p;
    return p * sqrt(Scalar(1) + qp*qp);
  }
@@ -218,6 +220,38 @@ struct functor_traits<scalar_quotient_op<Scalar> > {
  };
 };

+/** \internal
+  * \brief Template functor to compute the and of two booleans
+  *
+  * \sa class CwiseBinaryOp, ArrayBase::operator&&
+  */
+struct scalar_boolean_and_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
+  EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
+};
+template<> struct functor_traits<scalar_boolean_and_op> {
+  enum {
+    Cost = NumTraits<bool>::AddCost,
+    PacketAccess = false
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the or of two booleans
+  *
+  * \sa class CwiseBinaryOp, ArrayBase::operator||
+  */
+struct scalar_boolean_or_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
+  EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
+};
+template<> struct functor_traits<scalar_boolean_or_op> {
+  enum {
+    Cost = NumTraits<bool>::AddCost,
+    PacketAccess = false
+  };
+};
+
 // unary functors:

 /** \internal
@@ -605,7 +639,7 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
  EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
  {
    eigen_assert(col==0 || row==0);
-    return impl(col + row);
+    return impl.packetOp(col + row);
  }

  // This proxy object handles the actual required temporaries, the different
@@ -750,9 +784,9 @@ struct functor_traits<scalar_acos_op<Scalar> >
  */
 template<typename Scalar> struct scalar_asin_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
-  inline const Scalar operator() (const Scalar& a) const { return acos(a); }
+  inline const Scalar operator() (const Scalar& a) const { return asin(a); }
  typedef typename packet_traits<Scalar>::type Packet;
-  inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
+  inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_asin_op<Scalar> >
--- a/Eigen/src/Core/Fuzzy.h
+++ b/Eigen/src/Core/Fuzzy.h
@@ -34,9 +34,10 @@ struct isApprox_selector
 {
  static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar prec)
  {
+    using std::min;
    const typename internal::nested<Derived,2>::type nested(x);
    const typename internal::nested<OtherDerived,2>::type otherNested(y);
-    return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * std::min(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
+    return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
  }
 };

--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -0,0 +1,624 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_GENERAL_PRODUCT_H
+#define EIGEN_GENERAL_PRODUCT_H
+
+/** \class GeneralProduct
+  * \ingroup Core_Module
+  *
+  * \brief Expression of the product of two general matrices or vectors
+  *
+  * \param LhsNested the type used to store the left-hand side
+  * \param RhsNested the type used to store the right-hand side
+  * \param ProductMode the type of the product
+  *
+  * This class represents an expression of the product of two general matrices.
+  * We call a general matrix, a dense matrix with full storage. For instance,
+  * This excludes triangular, selfadjoint, and sparse matrices.
+  * It is the return type of the operator* between general matrices. Its template
+  * arguments are determined automatically by ProductReturnType. Therefore,
+  * GeneralProduct should never be used direclty. To determine the result type of a
+  * function which involves a matrix product, use ProductReturnType::Type.
+  *
+  * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
+  */
+template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
+class GeneralProduct;
+
+enum {
+  Large = 2,
+  Small = 3
+};
+
+namespace internal {
+
+template<int Rows, int Cols, int Depth> struct product_type_selector;
+
+template<int Size, int MaxSize> struct product_size_category
+{
+  enum { is_large = MaxSize == Dynamic ||
+                    Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
+         value = is_large  ? Large
+               : Size == 1 ? 1
+                           : Small
+  };
+};
+
+template<typename Lhs, typename Rhs> struct product_type
+{
+  typedef typename remove_all<Lhs>::type _Lhs;
+  typedef typename remove_all<Rhs>::type _Rhs;
+  enum {
+    MaxRows  = _Lhs::MaxRowsAtCompileTime,
+    Rows  = _Lhs::RowsAtCompileTime,
+    MaxCols  = _Rhs::MaxColsAtCompileTime,
+    Cols  = _Rhs::ColsAtCompileTime,
+    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
+                                           _Rhs::MaxRowsAtCompileTime),
+    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
+                                        _Rhs::RowsAtCompileTime),
+    LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+  };
+
+  // the splitting into different lines of code here, introducing the _select enums and the typedef below,
+  // is to work around an internal compiler error with gcc 4.1 and 4.2.
+private:
+  enum {
+    rows_select = product_size_category<Rows,MaxRows>::value,
+    cols_select = product_size_category<Cols,MaxCols>::value,
+    depth_select = product_size_category<Depth,MaxDepth>::value
+  };
+  typedef product_type_selector<rows_select, cols_select, depth_select> selector;
+
+public:
+  enum {
+    value = selector::ret
+  };
+#ifdef EIGEN_DEBUG_PRODUCT
+  static void debug()
+  {
+      EIGEN_DEBUG_VAR(Rows);
+      EIGEN_DEBUG_VAR(Cols);
+      EIGEN_DEBUG_VAR(Depth);
+      EIGEN_DEBUG_VAR(rows_select);
+      EIGEN_DEBUG_VAR(cols_select);
+      EIGEN_DEBUG_VAR(depth_select);
+      EIGEN_DEBUG_VAR(value);
+  }
+#endif
+};
+
+
+/* The following allows to select the kind of product at compile time
+ * based on the three dimensions of the product.
+ * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
+// FIXME I'm not sure the current mapping is the ideal one.
+template<int M, int N>  struct product_type_selector<M,N,1>              { enum { ret = OuterProduct }; };
+template<int Depth>     struct product_type_selector<1,    1,    Depth>  { enum { ret = InnerProduct }; };
+template<>              struct product_type_selector<1,    1,    1>      { enum { ret = InnerProduct }; };
+template<>              struct product_type_selector<Small,1,    Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<1,    Small,Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Small,Small,Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Small, Small, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
+template<>              struct product_type_selector<Small, Large, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
+template<>              struct product_type_selector<Large, Small, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
+template<>              struct product_type_selector<1,    Large,Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<1,    Large,Large>  { enum { ret = GemvProduct }; };
+template<>              struct product_type_selector<1,    Small,Large>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Large,1,    Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Large,1,    Large>  { enum { ret = GemvProduct }; };
+template<>              struct product_type_selector<Small,1,    Large>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Small,Small,Large>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Large,Small,Large>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Small,Large,Large>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Large,Large,Large>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Large,Small,Small>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Small,Large,Small>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Large,Large,Small>  { enum { ret = GemmProduct }; };
+
+} // end namespace internal
+
+/** \class ProductReturnType
+  * \ingroup Core_Module
+  *
+  * \brief Helper class to get the correct and optimized returned type of operator*
+  *
+  * \param Lhs the type of the left-hand side
+  * \param Rhs the type of the right-hand side
+  * \param ProductMode the type of the product (determined automatically by internal::product_mode)
+  *
+  * This class defines the typename Type representing the optimized product expression
+  * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
+  * is the recommended way to define the result type of a function returning an expression
+  * which involve a matrix product. The class Product should never be
+  * used directly.
+  *
+  * \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
+  */
+template<typename Lhs, typename Rhs, int ProductType>
+struct ProductReturnType
+{
+  // TODO use the nested type to reduce instanciations ????
+//   typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+//   typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+
+  typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
+};
+
+template<typename Lhs, typename Rhs>
+struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
+{
+  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
+  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
+  typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
+};
+
+template<typename Lhs, typename Rhs>
+struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
+{
+  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
+  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
+  typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
+};
+
+// this is a workaround for sun CC
+template<typename Lhs, typename Rhs>
+struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
+{};
+
+/***********************************************************************
+*  Implementation of Inner Vector Vector Product
+***********************************************************************/
+
+// FIXME : maybe the "inner product" could return a Scalar
+// instead of a 1x1 matrix ??
+// Pro: more natural for the user
+// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
+// product ends up to a row-vector times col-vector product... To tackle this use
+// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
+
+namespace internal {
+
+template<typename Lhs, typename Rhs>
+struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
+ : traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
+{};
+
+}
+
+template<typename Lhs, typename Rhs>
+class GeneralProduct<Lhs, Rhs, InnerProduct>
+  : internal::no_assignment_operator,
+    public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
+{
+    typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
+  public:
+    GeneralProduct(const Lhs& lhs, const Rhs& rhs)
+    {
+      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
+        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
+      Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
+    }
+
+    /** Convertion to scalar */
+    operator const typename Base::Scalar() const {
+      return Base::coeff(0,0);
+    }
+};
+
+/***********************************************************************
+*  Implementation of Outer Vector Vector Product
+***********************************************************************/
+
+namespace internal {
+template<int StorageOrder> struct outer_product_selector;
+
+template<typename Lhs, typename Rhs>
+struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
+ : traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
+{};
+
+}
+
+template<typename Lhs, typename Rhs>
+class GeneralProduct<Lhs, Rhs, OuterProduct>
+  : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
+{
+  public:
+    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
+
+    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
+    {
+      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
+        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+    }
+
+    template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+    {
+      internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
+    }
+};
+
+namespace internal {
+
+template<> struct outer_product_selector<ColMajor> {
+  template<typename ProductType, typename Dest>
+  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
+    typedef typename Dest::Index Index;
+    // FIXME make sure lhs is sequentially stored
+    // FIXME not very good if rhs is real and lhs complex while alpha is real too
+    const Index cols = dest.cols();
+    for (Index j=0; j<cols; ++j)
+      dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
+  }
+};
+
+template<> struct outer_product_selector<RowMajor> {
+  template<typename ProductType, typename Dest>
+  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
+    typedef typename Dest::Index Index;
+    // FIXME make sure rhs is sequentially stored
+    // FIXME not very good if lhs is real and rhs complex while alpha is real too
+    const Index rows = dest.rows();
+    for (Index i=0; i<rows; ++i)
+      dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
+  }
+};
+
+} // end namespace internal
+
+/***********************************************************************
+*  Implementation of General Matrix Vector Product
+***********************************************************************/
+
+/*  According to the shape/flags of the matrix we have to distinghish 3 different cases:
+ *   1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
+ *   2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
+ *   3 - all other cases are handled using a simple loop along the outer-storage direction.
+ *  Therefore we need a lower level meta selector.
+ *  Furthermore, if the matrix is the rhs, then the product has to be transposed.
+ */
+namespace internal {
+
+template<typename Lhs, typename Rhs>
+struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
+ : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
+{};
+
+template<int Side, int StorageOrder, bool BlasCompatible>
+struct gemv_selector;
+
+} // end namespace internal
+
+template<typename Lhs, typename Rhs>
+class GeneralProduct<Lhs, Rhs, GemvProduct>
+  : public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
+{
+  public:
+    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
+
+    typedef typename Lhs::Scalar LhsScalar;
+    typedef typename Rhs::Scalar RhsScalar;
+
+    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
+    {
+//       EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
+//         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+    }
+
+    enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
+    typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
+
+    template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+    {
+      eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
+      internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
+                       bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
+    }
+};
+
+namespace internal {
+
+// The vector is on the left => transposition
+template<int StorageOrder, bool BlasCompatible>
+struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
+{
+  template<typename ProductType, typename Dest>
+  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  {
+    Transpose<Dest> destT(dest);
+    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
+    gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
+      ::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
+        (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
+  }
+};
+
+template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
+
+template<typename Scalar,int Size,int MaxSize>
+struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
+{
+  EIGEN_STRONG_INLINE  Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
+};
+
+template<typename Scalar,int Size>
+struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
+{
+  EIGEN_STRONG_INLINE Scalar* data() { return 0; }
+};
+
+template<typename Scalar,int Size,int MaxSize>
+struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
+{
+  #if EIGEN_ALIGN_STATICALLY
+  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
+  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
+  #else
+  // Some architectures cannot align on the stack,
+  // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
+  enum {
+    ForceAlignment  = internal::packet_traits<Scalar>::Vectorizable,
+    PacketSize      = internal::packet_traits<Scalar>::size
+  };
+  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
+  EIGEN_STRONG_INLINE Scalar* data() {
+    return ForceAlignment
+            ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
+            : m_data.array;
+  }
+  #endif
+};
+
+template<> struct gemv_selector<OnTheRight,ColMajor,true>
+{
+  template<typename ProductType, typename Dest>
+  static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  {
+    typedef typename ProductType::Index Index;
+    typedef typename ProductType::LhsScalar   LhsScalar;
+    typedef typename ProductType::RhsScalar   RhsScalar;
+    typedef typename ProductType::Scalar      ResScalar;
+    typedef typename ProductType::RealScalar  RealScalar;
+    typedef typename ProductType::ActualLhsType ActualLhsType;
+    typedef typename ProductType::ActualRhsType ActualRhsType;
+    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
+    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
+    typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
+
+    const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
+    const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
+                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
+
+    enum {
+      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
+      // on, the other hand it is good for the cache to pack the vector anyways...
+      EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
+      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
+      MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
+    };
+
+    gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
+
+    bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
+    bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
+    
+    RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
+
+    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
+                                                  evalToDest ? dest.data() : static_dest.data());
+    
+    if(!evalToDest)
+    {
+      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      int size = dest.size();
+      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      #endif
+      if(!alphaIsCompatible)
+      {
+        MappedDest(actualDestPtr, dest.size()).setZero();
+        compatibleAlpha = RhsScalar(1);
+      }
+      else
+        MappedDest(actualDestPtr, dest.size()) = dest;
+    }
+
+    general_matrix_vector_product
+      <Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
+        actualLhs.rows(), actualLhs.cols(),
+        &actualLhs.coeffRef(0,0), actualLhs.outerStride(),
+        actualRhs.data(), actualRhs.innerStride(),
+        actualDestPtr, 1,
+        compatibleAlpha);
+
+    if (!evalToDest)
+    {
+      if(!alphaIsCompatible)
+        dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
+      else
+        dest = MappedDest(actualDestPtr, dest.size());
+    }
+  }
+};
+
+template<> struct gemv_selector<OnTheRight,RowMajor,true>
+{
+  template<typename ProductType, typename Dest>
+  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  {
+    typedef typename ProductType::LhsScalar LhsScalar;
+    typedef typename ProductType::RhsScalar RhsScalar;
+    typedef typename ProductType::Scalar    ResScalar;
+    typedef typename ProductType::Index Index;
+    typedef typename ProductType::ActualLhsType ActualLhsType;
+    typedef typename ProductType::ActualRhsType ActualRhsType;
+    typedef typename ProductType::_ActualRhsType _ActualRhsType;
+    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
+    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
+
+    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
+                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
+
+    enum {
+      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
+      // on, the other hand it is good for the cache to pack the vector anyways...
+      DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
+    };
+
+    gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+
+    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
+        DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
+
+    if(!DirectlyUseRhs)
+    {
+      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      int size = actualRhs.size();
+      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      #endif
+      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+    }
+
+    general_matrix_vector_product
+      <Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
+        actualLhs.rows(), actualLhs.cols(),
+        &actualLhs.coeffRef(0,0), actualLhs.outerStride(),
+        actualRhsPtr, 1,
+        &dest.coeffRef(0,0), dest.innerStride(),
+        actualAlpha);
+  }
+};
+
+template<> struct gemv_selector<OnTheRight,ColMajor,false>
+{
+  template<typename ProductType, typename Dest>
+  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  {
+    typedef typename Dest::Index Index;
+    // TODO makes sure dest is sequentially stored in memory, otherwise use a temp
+    const Index size = prod.rhs().rows();
+    for(Index k=0; k<size; ++k)
+      dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
+  }
+};
+
+template<> struct gemv_selector<OnTheRight,RowMajor,false>
+{
+  template<typename ProductType, typename Dest>
+  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  {
+    typedef typename Dest::Index Index;
+    // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
+    const Index rows = prod.rows();
+    for(Index i=0; i<rows; ++i)
+      dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
+  }
+};
+
+} // end namespace internal
+
+/***************************************************************************
+* Implementation of matrix base methods
+***************************************************************************/
+
+/** \returns the matrix product of \c *this and \a other.
+  *
+  * \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
+  *
+  * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline const typename ProductReturnType<Derived,OtherDerived>::Type
+MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
+{
+  // A note regarding the function declaration: In MSVC, this function will sometimes
+  // not be inlined since DenseStorage is an unwindable object for dynamic
+  // matrices and product types are holding a member to store the result.
+  // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
+  enum {
+    ProductIsValid =  Derived::ColsAtCompileTime==Dynamic
+                   || OtherDerived::RowsAtCompileTime==Dynamic
+                   || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
+    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
+  };
+  // note to the lost user:
+  //    * for a dot product use: v1.dot(v2)
+  //    * for a coeff-wise product use: v1.cwiseProduct(v2)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
+    INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+#ifdef EIGEN_DEBUG_PRODUCT
+  internal::product_type<Derived,OtherDerived>::debug();
+#endif
+  return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+}
+
+/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
+  *
+  * The returned product will behave like any other expressions: the coefficients of the product will be
+  * computed once at a time as requested. This might be useful in some extremely rare cases when only
+  * a small and no coherent fraction of the result's coefficients have to be computed.
+  *
+  * \warning This version of the matrix product can be much much slower. So use it only if you know
+  * what you are doing and that you measured a true speed improvement.
+  *
+  * \sa operator*(const MatrixBase&)
+  */
+template<typename Derived>
+template<typename OtherDerived>
+const typename LazyProductReturnType<Derived,OtherDerived>::Type
+MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
+{
+  enum {
+    ProductIsValid =  Derived::ColsAtCompileTime==Dynamic
+                   || OtherDerived::RowsAtCompileTime==Dynamic
+                   || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
+    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
+  };
+  // note to the lost user:
+  //    * for a dot product use: v1.dot(v2)
+  //    * for a coeff-wise product use: v1.cwiseProduct(v2)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
+    INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+
+  return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+}
+
+#endif // EIGEN_PRODUCT_H
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -134,12 +134,12 @@ pdiv(const Packet& a,
 /** \internal \returns the min of \a a and \a b  (coeff-wise) */
 template<typename Packet> inline Packet
 pmin(const Packet& a,
-        const Packet& b) { return std::min(a, b); }
+        const Packet& b) { using std::min; return (min)(a, b); }

 /** \internal \returns the max of \a a and \a b  (coeff-wise) */
 template<typename Packet> inline Packet
 pmax(const Packet& a,
-        const Packet& b) { return std::max(a, b); }
+        const Packet& b) { using std::max; return (max)(a, b); }

 /** \internal \returns the absolute value of \a a */
 template<typename Packet> inline Packet
@@ -286,7 +286,7 @@ pmadd(const Packet&  a,
 { return padd(pmul(a, b),c); }

 /** \internal \returns a packet version of \a *from.
-  * \If LoadMode equals Aligned, \a from must be 16 bytes aligned */
+  * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
 template<typename Packet, int LoadMode>
 inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
 {
@@ -297,7 +297,7 @@ inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
 }

 /** \internal copy the packet \a from to \a *to.
-  * If StoreMode equals Aligned, \a to must be 16 bytes aligned */
+  * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
 template<typename Scalar, typename Packet, int LoadMode>
 inline void pstoret(Scalar* to, const Packet& from)
 {
--- a/Eigen/src/Core/IO.h
+++ b/Eigen/src/Core/IO.h
@@ -141,7 +141,8 @@ struct significant_decimals_default_impl
  typedef typename NumTraits<Scalar>::Real RealScalar;
  static inline int run()
  {
-    return cast<RealScalar,int>(std::ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
+    using std::ceil;
+    return cast<RealScalar,int>(ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
  }
 };

--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -31,10 +31,10 @@
  *
  * \brief A matrix or vector expression mapping an existing array of data.
  *
-  * \param PlainObjectType the equivalent matrix type of the mapped data
-  * \param MapOptions specifies whether the pointer is \c Aligned, or \c Unaligned.
-  *                The default is \c Unaligned.
-  * \param StrideType optionnally specifies strides. By default, Map assumes the memory layout
+  * \tparam PlainObjectType the equivalent matrix type of the mapped data
+  * \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned.
+  *                The default is \c #Unaligned.
+  * \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
  *                   of an ordinary, contiguous array. This can be overridden by specifying strides.
  *                   The type passed here must be a specialization of the Stride template, see examples below.
  *
@@ -72,9 +72,9 @@
  * Example: \include Map_placement_new.cpp
  * Output: \verbinclude Map_placement_new.out
  *
-  * This class is the return type of Matrix::Map() but can also be used directly.
+  * This class is the return type of PlainObjectBase::Map() but can also be used directly.
  *
-  * \sa Matrix::Map(), \ref TopicStorageOrders
+  * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
  */

 namespace internal {
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -170,8 +170,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
                                        internal::inner_stride_at_compile_time<Derived>::ret==1),
                          PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
-      eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % (sizeof(Scalar)*internal::packet_traits<Scalar>::size)) == 0)
-        && "data is not aligned");
+      eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
+                   && "data is not aligned");
    }

    PointerType m_data;
@@ -238,7 +238,7 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
                (this->m_data + index * innerStride(), x);
    }

-    inline MapBase(PointerType data) : Base(data) {}
+    explicit inline MapBase(PointerType data) : Base(data) {}
    inline MapBase(PointerType data, Index size) : Base(data, size) {}
    inline MapBase(PointerType data, Index rows, Index cols) : Base(data, rows, cols) {}

--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -87,7 +87,8 @@ struct real_impl<std::complex<RealScalar> >
 {
  static inline RealScalar run(const std::complex<RealScalar>& x)
  {
-    return std::real(x);
+    using std::real;
+    return real(x);
  }
 };

@@ -122,7 +123,8 @@ struct imag_impl<std::complex<RealScalar> >
 {
  static inline RealScalar run(const std::complex<RealScalar>& x)
  {
-    return std::imag(x);
+    using std::imag;
+    return imag(x);
  }
 };

@@ -244,7 +246,8 @@ struct conj_impl<std::complex<RealScalar> >
 {
  static inline std::complex<RealScalar> run(const std::complex<RealScalar>& x)
  {
-    return std::conj(x);
+    using std::conj;
+    return conj(x);
  }
 };

@@ -270,7 +273,8 @@ struct abs_impl
  typedef typename NumTraits<Scalar>::Real RealScalar;
  static inline RealScalar run(const Scalar& x)
  {
-    return std::abs(x);
+    using std::abs;
+    return abs(x);
  }
 };

@@ -305,7 +309,7 @@ struct abs2_impl<std::complex<RealScalar> >
 {
  static inline RealScalar run(const std::complex<RealScalar>& x)
  {
-    return std::norm(x);
+    return real(x)*real(x) + imag(x)*imag(x);
  }
 };

@@ -369,10 +373,12 @@ struct hypot_impl
  typedef typename NumTraits<Scalar>::Real RealScalar;
  static inline RealScalar run(const Scalar& x, const Scalar& y)
  {
+    using std::max;
+    using std::min;
    RealScalar _x = abs(x);
    RealScalar _y = abs(y);
-    RealScalar p = std::max(_x, _y);
-    RealScalar q = std::min(_x, _y);
+    RealScalar p = (max)(_x, _y);
+    RealScalar q = (min)(_x, _y);
    RealScalar qp = q/p;
    return p * sqrt(RealScalar(1) + qp*qp);
  }
@@ -420,7 +426,8 @@ struct sqrt_default_impl
 {
  static inline Scalar run(const Scalar& x)
  {
-    return std::sqrt(x);
+    using std::sqrt;
+    return sqrt(x);
  }
 };

@@ -460,7 +467,7 @@ inline EIGEN_MATHFUNC_RETVAL(sqrt, Scalar) sqrt(const Scalar& x)
 // This macro instanciate all the necessary template mechanism which is common to all unary real functions.
 #define EIGEN_MATHFUNC_STANDARD_REAL_UNARY(NAME) \
  template<typename Scalar, bool IsInteger> struct NAME##_default_impl {            \
-    static inline Scalar run(const Scalar& x) { return std::NAME(x); }              \
+    static inline Scalar run(const Scalar& x) { using std::NAME; return NAME(x); }  \
  };                                                                                \
  template<typename Scalar> struct NAME##_default_impl<Scalar, true> {              \
    static inline Scalar run(const Scalar&) {                                       \
@@ -495,7 +502,8 @@ struct atan2_default_impl
  typedef Scalar retval;
  static inline Scalar run(const Scalar& x, const Scalar& y)
  {
-    return std::atan2(x, y);
+    using std::atan2;
+    return atan2(x, y);
  }
 };

@@ -534,7 +542,8 @@ struct pow_default_impl
  typedef Scalar retval;
  static inline Scalar run(const Scalar& x, const Scalar& y)
  {
-    return std::pow(x, y);
+    using std::pow;
+    return pow(x, y);
  }
 };

@@ -726,7 +735,8 @@ struct scalar_fuzzy_default_impl<Scalar, false, false>
  }
  static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
  {
-    return abs(x - y) <= std::min(abs(x), abs(y)) * prec;
+    using std::min;
+    return abs(x - y) <= (min)(abs(x), abs(y)) * prec;
  }
  static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
  {
@@ -764,7 +774,8 @@ struct scalar_fuzzy_default_impl<Scalar, true, false>
  }
  static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
  {
-    return abs2(x - y) <= std::min(abs2(x), abs2(y)) * prec * prec;
+    using std::min;
+    return abs2(x - y) <= (min)(abs2(x), abs2(y)) * prec * prec;
  }
 };

--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -43,8 +43,8 @@
  * \tparam _Cols Number of columns, or \b Dynamic
  *
  * The remaining template parameters are optional -- in most cases you don't have to worry about them.
-  * \tparam _Options \anchor matrix_tparam_options A combination of either \b RowMajor or \b ColMajor, and of either
-  *                 \b AutoAlign or \b DontAlign.
+  * \tparam _Options \anchor matrix_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either
+  *                 \b #AutoAlign or \b #DontAlign.
  *                 The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
  *                 for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
  * \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note").
@@ -153,10 +153,6 @@ class Matrix

    typedef typename Base::PlainObject PlainObject;

-    enum { NeedsToAlign = (!(Options&DontAlign))
-                          && SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
-    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
-
    using Base::base;
    using Base::coeffRef;

@@ -415,25 +411,6 @@ EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)

 #undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
 #undef EIGEN_MAKE_TYPEDEFS
-
-#undef EIGEN_MAKE_TYPEDEFS_LARGE
-
-#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
-using Eigen::Matrix##SizeSuffix##TypeSuffix; \
-using Eigen::Vector##SizeSuffix##TypeSuffix; \
-using Eigen::RowVector##SizeSuffix##TypeSuffix;
-
-#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
-
-#define EIGEN_USING_MATRIX_TYPEDEFS \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
+#undef EIGEN_MAKE_FIXED_TYPEDEFS

 #endif // EIGEN_MATRIX_H
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -111,7 +111,7 @@ template<typename Derived> class MatrixBase

    /** \returns the size of the main diagonal, which is min(rows(),cols()).
      * \sa rows(), cols(), SizeAtCompileTime. */
-    inline Index diagonalSize() const { return std::min(rows(),cols()); }
+    inline Index diagonalSize() const { return (std::min)(rows(),cols()); }

    /** \brief The plain matrix type corresponding to this expression.
      *
@@ -465,6 +465,8 @@ template<typename Derived> class MatrixBase
    const MatrixFunctionReturnValue<Derived> sinh() const;
    const MatrixFunctionReturnValue<Derived> cos() const;
    const MatrixFunctionReturnValue<Derived> sin() const;
+    const MatrixSquareRootReturnValue<Derived> sqrt() const;
+    const MatrixLogarithmReturnValue<Derived> log() const;

 #ifdef EIGEN2_SUPPORT
    template<typename ProductDerived, typename Lhs, typename Rhs>
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -87,8 +87,8 @@ template<typename T> struct GenericNumTraits
    // make sure to override this for floating-point types
    return Real(0);
  }
-  inline static T highest() { return std::numeric_limits<T>::max(); }
-  inline static T lowest()  { return IsInteger ? std::numeric_limits<T>::min() : (-std::numeric_limits<T>::max()); }
+  inline static T highest() { return (std::numeric_limits<T>::max)(); }
+  inline static T lowest()  { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
  
 #ifdef EIGEN2_SUPPORT
  enum {
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -34,6 +34,19 @@

 namespace internal {

+template<typename Index>
+EIGEN_ALWAYS_INLINE void check_rows_cols_for_overflow(Index rows, Index cols)
+{
+  // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
+  // we assume Index is signed
+  Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
+  bool error = (rows < 0  || cols < 0)  ? true
+             : (rows == 0 || cols == 0) ? false
+                                        : (rows > max_index / cols);
+  if (error)
+    throw_std_bad_alloc();
+}
+
 template <typename Derived, typename OtherDerived = Derived, bool IsVector = static_cast<bool>(Derived::IsVectorAtCompileTime)> struct conservative_resize_like_impl;

 template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;
@@ -84,14 +97,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
    template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; };
    template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; };
-    

  protected:
    DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;

  public:
-    enum { NeedsToAlign = (!(Options&DontAlign))
-                          && SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
+    enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)

    Base& base() { return *static_cast<Base*>(this); }
@@ -200,11 +211,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
    {
      #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
+        internal::check_rows_cols_for_overflow(rows, cols);
        Index size = rows*cols;
        bool size_changed = size != this->size();
        m_storage.resize(size, rows, cols);
        if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
      #else
+        internal::check_rows_cols_for_overflow(rows, cols);
        m_storage.resize(rows*cols, rows, cols);
      #endif
    }
@@ -273,6 +286,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
    {
      const OtherDerived& other = _other.derived();
+      internal::check_rows_cols_for_overflow(other.rows(), other.cols());
      const Index othersize = other.rows()*other.cols();
      if(RowsAtCompileTime == 1)
      {
@@ -417,6 +431,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
    {
      _check_template_params();
+      internal::check_rows_cols_for_overflow(other.derived().rows(), other.derived().cols());
      Base::operator=(other.derived());
    }

@@ -425,9 +440,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
      * \a data pointers.
      *
-      * These methods do not allow to specify strides. If you need to specify strides, you have to
-      * use the Map class directly.
-      *
      * \see class Map
      */
    //@{
@@ -582,8 +594,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    template<typename T0, typename T1>
    EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
    {
+      EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
+                          bool(NumTraits<T1>::IsInteger),
+                          FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
      eigen_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
             && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
+      internal::check_rows_cols_for_overflow(rows, cols);      
      m_storage.resize(rows*cols,rows,cols);
      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
    }
@@ -641,14 +657,15 @@ struct internal::conservative_resize_like_impl
    if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows
         (!Derived::IsRowMajor && _this.rows() == rows) )  // column-major and we change only the number of columns
    {
+      internal::check_rows_cols_for_overflow(rows, cols);
      _this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
    }
    else
    {
      // The storage order does not allow us to use reallocation.
      typename Derived::PlainObject tmp(rows,cols);
-      const Index common_rows = std::min(rows, _this.rows());
-      const Index common_cols = std::min(cols, _this.cols());
+      const Index common_rows = (std::min)(rows, _this.rows());
+      const Index common_cols = (std::min)(cols, _this.cols());
      tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
      _this.derived().swap(tmp);
    }
@@ -681,8 +698,8 @@ struct internal::conservative_resize_like_impl
    {
      // The storage order does not allow us to use reallocation.
      typename Derived::PlainObject tmp(other);
-      const Index common_rows = std::min(tmp.rows(), _this.rows());
-      const Index common_cols = std::min(tmp.cols(), _this.cols());
+      const Index common_rows = (std::min)(tmp.rows(), _this.rows());
+      const Index common_cols = (std::min)(tmp.cols(), _this.cols());
      tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
      _this.derived().swap(tmp);
    }
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -1,8 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -26,603 +25,103 @@
 #ifndef EIGEN_PRODUCT_H
 #define EIGEN_PRODUCT_H

-/** \class GeneralProduct
+template<typename Lhs, typename Rhs> class Product;
+template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
+
+/** \class Product
  * \ingroup Core_Module
  *
-  * \brief Expression of the product of two general matrices or vectors
+  * \brief Expression of the product of two arbitrary matrices or vectors
  *
-  * \param LhsNested the type used to store the left-hand side
-  * \param RhsNested the type used to store the right-hand side
-  * \param ProductMode the type of the product
+  * \param Lhs the type of the left-hand side expression
+  * \param Rhs the type of the right-hand side expression
  *
-  * This class represents an expression of the product of two general matrices.
-  * We call a general matrix, a dense matrix with full storage. For instance,
-  * This excludes triangular, selfadjoint, and sparse matrices.
-  * It is the return type of the operator* between general matrices. Its template
-  * arguments are determined automatically by ProductReturnType. Therefore,
-  * GeneralProduct should never be used direclty. To determine the result type of a
-  * function which involves a matrix product, use ProductReturnType::Type.
+  * This class represents an expression of the product of two arbitrary matrices.
  *
-  * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
  */
-template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
-class GeneralProduct;
-
-enum {
-  Large = 2,
-  Small = 3
-};

 namespace internal {
-
-template<int Rows, int Cols, int Depth> struct product_type_selector;
-
-template<int Size, int MaxSize> struct product_size_category
+template<typename Lhs, typename Rhs>
+struct traits<Product<Lhs, Rhs> >
 {
-  enum { is_large = MaxSize == Dynamic ||
-                    Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
-         value = is_large  ? Large
-               : Size == 1 ? 1
-                           : Small
+  typedef MatrixXpr XprKind;
+  typedef typename remove_all<Lhs>::type LhsCleaned;
+  typedef typename remove_all<Rhs>::type RhsCleaned;
+  typedef typename scalar_product_traits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
+  typedef typename promote_storage_type<typename traits<LhsCleaned>::StorageKind,
+                                        typename traits<RhsCleaned>::StorageKind>::ret StorageKind;
+  typedef typename promote_index_type<typename traits<LhsCleaned>::Index,
+                                      typename traits<RhsCleaned>::Index>::type Index;
+  enum {
+    RowsAtCompileTime = LhsCleaned::RowsAtCompileTime,
+    ColsAtCompileTime = RhsCleaned::ColsAtCompileTime,
+    MaxRowsAtCompileTime = LhsCleaned::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = RhsCleaned::MaxColsAtCompileTime,
+    Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0), // TODO should be no storage order
+    CoeffReadCost = 0 // TODO CoeffReadCost should not be part of the expression traits
  };
 };
-
-template<typename Lhs, typename Rhs> struct product_type
-{
-  typedef typename remove_all<Lhs>::type _Lhs;
-  typedef typename remove_all<Rhs>::type _Rhs;
-  enum {
-    MaxRows  = _Lhs::MaxRowsAtCompileTime,
-    Rows  = _Lhs::RowsAtCompileTime,
-    MaxCols  = _Rhs::MaxColsAtCompileTime,
-    Cols  = _Rhs::ColsAtCompileTime,
-    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
-                                           _Rhs::MaxRowsAtCompileTime),
-    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
-                                        _Rhs::RowsAtCompileTime),
-    LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
-  };
-
-  // the splitting into different lines of code here, introducing the _select enums and the typedef below,
-  // is to work around an internal compiler error with gcc 4.1 and 4.2.
-private:
-  enum {
-    rows_select = product_size_category<Rows,MaxRows>::value,
-    cols_select = product_size_category<Cols,MaxCols>::value,
-    depth_select = product_size_category<Depth,MaxDepth>::value
-  };
-  typedef product_type_selector<rows_select, cols_select, depth_select> selector;
-
-public:
-  enum {
-    value = selector::ret
-  };
-#ifdef EIGEN_DEBUG_PRODUCT
-  static void debug()
-  {
-      EIGEN_DEBUG_VAR(Rows);
-      EIGEN_DEBUG_VAR(Cols);
-      EIGEN_DEBUG_VAR(Depth);
-      EIGEN_DEBUG_VAR(rows_select);
-      EIGEN_DEBUG_VAR(cols_select);
-      EIGEN_DEBUG_VAR(depth_select);
-      EIGEN_DEBUG_VAR(value);
-  }
-#endif
-};
-
-
-/* The following allows to select the kind of product at compile time
- * based on the three dimensions of the product.
- * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
-// FIXME I'm not sure the current mapping is the ideal one.
-template<int M, int N>  struct product_type_selector<M,N,1>              { enum { ret = OuterProduct }; };
-template<int Depth>     struct product_type_selector<1,    1,    Depth>  { enum { ret = InnerProduct }; };
-template<>              struct product_type_selector<1,    1,    1>      { enum { ret = InnerProduct }; };
-template<>              struct product_type_selector<Small,1,    Small>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<1,    Small,Small>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<Small,Small,Small>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<Small, Small, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
-template<>              struct product_type_selector<Small, Large, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
-template<>              struct product_type_selector<Large, Small, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
-template<>              struct product_type_selector<1,    Large,Small>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<1,    Large,Large>  { enum { ret = GemvProduct }; };
-template<>              struct product_type_selector<1,    Small,Large>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<Large,1,    Small>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<Large,1,    Large>  { enum { ret = GemvProduct }; };
-template<>              struct product_type_selector<Small,1,    Large>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<Small,Small,Large>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Large,Small,Large>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Small,Large,Large>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Large,Large,Large>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Large,Small,Small>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Small,Large,Small>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Large,Large,Small>  { enum { ret = GemmProduct }; };
-
 } // end namespace internal

-/** \class ProductReturnType
-  * \ingroup Core_Module
-  *
-  * \brief Helper class to get the correct and optimized returned type of operator*
-  *
-  * \param Lhs the type of the left-hand side
-  * \param Rhs the type of the right-hand side
-  * \param ProductMode the type of the product (determined automatically by internal::product_mode)
-  *
-  * This class defines the typename Type representing the optimized product expression
-  * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
-  * is the recommended way to define the result type of a function returning an expression
-  * which involve a matrix product. The class Product should never be
-  * used directly.
-  *
-  * \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
-  */
-template<typename Lhs, typename Rhs, int ProductType>
-struct ProductReturnType
-{
-  // TODO use the nested type to reduce instanciations ????
-//   typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
-//   typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
-
-  typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
-};

 template<typename Lhs, typename Rhs>
-struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
-{
-  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
-  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
-  typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
-};
-
-template<typename Lhs, typename Rhs>
-struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
-{
-  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
-  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
-  typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
-};
-
-// this is a workaround for sun CC
-template<typename Lhs, typename Rhs>
-struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
-{};
-
-/***********************************************************************
-*  Implementation of Inner Vector Vector Product
-***********************************************************************/
-
-// FIXME : maybe the "inner product" could return a Scalar
-// instead of a 1x1 matrix ??
-// Pro: more natural for the user
-// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
-// product ends up to a row-vector times col-vector product... To tackle this use
-// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
-
-namespace internal {
-
-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
- : traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
-{};
-
-}
-
-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, InnerProduct>
-  : internal::no_assignment_operator,
-    public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
-{
-    typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
-  public:
-    GeneralProduct(const Lhs& lhs, const Rhs& rhs)
-    {
-      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
-        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-
-      Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
-    }
-
-    /** Convertion to scalar */
-    operator const typename Base::Scalar() const {
-      return Base::coeff(0,0);
-    }
-};
-
-/***********************************************************************
-*  Implementation of Outer Vector Vector Product
-***********************************************************************/
-
-namespace internal {
-template<int StorageOrder> struct outer_product_selector;
-
-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
- : traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
-{};
-
-}
-
-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, OuterProduct>
-  : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
+class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
+                                                                            typename internal::traits<Rhs>::StorageKind>::ret>
 {
  public:
-    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
-
-    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
-    {
-      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
-        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-    }
-
-    template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
-    {
-      internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
-    }
-};
-
-namespace internal {
-
-template<> struct outer_product_selector<ColMajor> {
-  template<typename ProductType, typename Dest>
-  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
-    typedef typename Dest::Index Index;
-    // FIXME make sure lhs is sequentially stored
-    // FIXME not very good if rhs is real and lhs complex while alpha is real too
-    const Index cols = dest.cols();
-    for (Index j=0; j<cols; ++j)
-      dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
-  }
-};
-
-template<> struct outer_product_selector<RowMajor> {
-  template<typename ProductType, typename Dest>
-  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
-    typedef typename Dest::Index Index;
-    // FIXME make sure rhs is sequentially stored
-    // FIXME not very good if lhs is real and rhs complex while alpha is real too
-    const Index rows = dest.rows();
-    for (Index i=0; i<rows; ++i)
-      dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
-  }
-};
-
-} // end namespace internal
-
-/***********************************************************************
-*  Implementation of General Matrix Vector Product
-***********************************************************************/
-
-/*  According to the shape/flags of the matrix we have to distinghish 3 different cases:
- *   1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
- *   2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
- *   3 - all other cases are handled using a simple loop along the outer-storage direction.
- *  Therefore we need a lower level meta selector.
- *  Furthermore, if the matrix is the rhs, then the product has to be transposed.
- */
-namespace internal {
-
-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
- : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
-{};
-
-template<int Side, int StorageOrder, bool BlasCompatible>
-struct gemv_selector;
-
-} // end namespace internal
-
-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, GemvProduct>
-  : public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
-{
-  public:
-    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
-
-    typedef typename Lhs::Scalar LhsScalar;
-    typedef typename Rhs::Scalar RhsScalar;
-
-    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
-    {
-//       EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
-//         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-    }
-
-    enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
-    typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
-
-    template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
-    {
-      eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
-      internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
-                       bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
-    }
-};
-
-namespace internal {
-
-// The vector is on the left => transposition
-template<int StorageOrder, bool BlasCompatible>
-struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
-{
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
-  {
-    Transpose<Dest> destT(dest);
-    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
-    gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
-      ::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
-        (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
-  }
-};
-
-template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
-
-template<typename Scalar,int Size,int MaxSize>
-struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
-{
-  EIGEN_STRONG_INLINE  Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
-};
-
-template<typename Scalar,int Size>
-struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
-{
-  EIGEN_STRONG_INLINE Scalar* data() { return 0; }
-};
-
-template<typename Scalar,int Size,int MaxSize>
-struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
-{
-  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
-  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
-};
-
-template<> struct gemv_selector<OnTheRight,ColMajor,true>
-{
-  template<typename ProductType, typename Dest>
-  static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
-  {
-    typedef typename ProductType::Index Index;
-    typedef typename ProductType::LhsScalar   LhsScalar;
-    typedef typename ProductType::RhsScalar   RhsScalar;
-    typedef typename ProductType::Scalar      ResScalar;
-    typedef typename ProductType::RealScalar  RealScalar;
-    typedef typename ProductType::ActualLhsType ActualLhsType;
-    typedef typename ProductType::ActualRhsType ActualRhsType;
-    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
-    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
-    typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
-
-    const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
-    const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
-
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
-                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
-
-    enum {
-      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
-      // on, the other hand it is good for the cache to pack the vector anyways...
-      EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
-      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
-      MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
-    };
-
-    gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
-
-    bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
-    bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
    
-    RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
+    typedef typename ProductImpl<
+        Lhs, Rhs,
+        typename internal::promote_storage_type<typename Lhs::StorageKind,
+                                                typename Rhs::StorageKind>::ret>::Base Base;
+    EIGEN_GENERIC_PUBLIC_INTERFACE(Product)

-    ResScalar* actualDestPtr;
-    bool freeDestPtr = false;
-    if (evalToDest)
+    typedef typename Lhs::Nested LhsNested;
+    typedef typename Rhs::Nested RhsNested;
+    typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
+    typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+
+    Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
    {
-      actualDestPtr = &dest.coeffRef(0);
-    }
-    else
-    {
-      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      int size = dest.size();
-      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      #endif
-      if((actualDestPtr = static_dest.data())==0)
-      {
-        freeDestPtr = true;
-        actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
-      }
-      if(!alphaIsCompatible)
-      {
-        MappedDest(actualDestPtr, dest.size()).setZero();
-        compatibleAlpha = RhsScalar(1);
-      }
-      else
-        MappedDest(actualDestPtr, dest.size()) = dest;
+      eigen_assert(lhs.cols() == rhs.rows()
+        && "invalid matrix product"
+        && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
    }

-    general_matrix_vector_product
-      <Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
-        actualLhs.rows(), actualLhs.cols(),
-        &actualLhs.coeffRef(0,0), actualLhs.outerStride(),
-        actualRhs.data(), actualRhs.innerStride(),
-        actualDestPtr, 1,
-        compatibleAlpha);
+    inline Index rows() const { return m_lhs.rows(); }
+    inline Index cols() const { return m_rhs.cols(); }

-    if (!evalToDest)
-    {
-      if(!alphaIsCompatible)
-        dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
-      else
-        dest = MappedDest(actualDestPtr, dest.size());
-      if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
-    }
-  }
+    const LhsNestedCleaned& lhs() const { return m_lhs; }
+    const RhsNestedCleaned& rhs() const { return m_rhs; }
+
+  protected:
+
+    const LhsNested m_lhs;
+    const RhsNested m_rhs;
 };

-template<> struct gemv_selector<OnTheRight,RowMajor,true>
+template<typename Lhs, typename Rhs>
+class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type
 {
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
-  {
-    typedef typename ProductType::LhsScalar LhsScalar;
-    typedef typename ProductType::RhsScalar RhsScalar;
-    typedef typename ProductType::Scalar    ResScalar;
-    typedef typename ProductType::Index Index;
-    typedef typename ProductType::ActualLhsType ActualLhsType;
-    typedef typename ProductType::ActualRhsType ActualRhsType;
-    typedef typename ProductType::_ActualRhsType _ActualRhsType;
-    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
-    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
+    typedef Product<Lhs, Rhs> Derived;
+  public:

-    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
-    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
-
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
-                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
-
-    enum {
-      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
-      // on, the other hand it is good for the cache to pack the vector anyways...
-      DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
-    };
-
-    gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
-
-    RhsScalar* actualRhsPtr;
-    bool freeRhsPtr = false;
-    if (DirectlyUseRhs)
-    {
-      actualRhsPtr = const_cast<RhsScalar*>(&actualRhs.coeffRef(0));
-    }
-    else
-    {
-      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      int size = actualRhs.size();
-      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      #endif
-      if((actualRhsPtr = static_rhs.data())==0)
-      {
-        freeRhsPtr = true;
-        actualRhsPtr = ei_aligned_stack_new(RhsScalar, actualRhs.size());
-      }
-      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
-    }
-
-    general_matrix_vector_product
-      <Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
-        actualLhs.rows(), actualLhs.cols(),
-        &actualLhs.coeffRef(0,0), actualLhs.outerStride(),
-        actualRhsPtr, 1,
-        &dest.coeffRef(0,0), dest.innerStride(),
-        actualAlpha);
-
-    if((!DirectlyUseRhs) && freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, prod.rhs().size());
-  }
+    typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
 };

-template<> struct gemv_selector<OnTheRight,ColMajor,false>
-{
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
-  {
-    typedef typename Dest::Index Index;
-    // TODO makes sure dest is sequentially stored in memory, otherwise use a temp
-    const Index size = prod.rhs().rows();
-    for(Index k=0; k<size; ++k)
-      dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
-  }
-};
-
-template<> struct gemv_selector<OnTheRight,RowMajor,false>
-{
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
-  {
-    typedef typename Dest::Index Index;
-    // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
-    const Index rows = prod.rows();
-    for(Index i=0; i<rows; ++i)
-      dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
-  }
-};
-
-} // end namespace internal
-
 /***************************************************************************
 * Implementation of matrix base methods
 ***************************************************************************/

-/** \returns the matrix product of \c *this and \a other.
-  *
-  * \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
-  *
-  * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
-  */
-template<typename Derived>
-template<typename OtherDerived>
-inline const typename ProductReturnType<Derived,OtherDerived>::Type
-MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
-{
-  // A note regarding the function declaration: In MSVC, this function will sometimes
-  // not be inlined since DenseStorage is an unwindable object for dynamic
-  // matrices and product types are holding a member to store the result.
-  // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
-  enum {
-    ProductIsValid =  Derived::ColsAtCompileTime==Dynamic
-                   || OtherDerived::RowsAtCompileTime==Dynamic
-                   || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
-    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
-    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
-  };
-  // note to the lost user:
-  //    * for a dot product use: v1.dot(v2)
-  //    * for a coeff-wise product use: v1.cwiseProduct(v2)
-  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
-    INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
-  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
-    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
-  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
-#ifdef EIGEN_DEBUG_PRODUCT
-  internal::product_type<Derived,OtherDerived>::debug();
-#endif
-  return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
-}

-/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
-  *
-  * The returned product will behave like any other expressions: the coefficients of the product will be
-  * computed once at a time as requested. This might be useful in some extremely rare cases when only
-  * a small and no coherent fraction of the result's coefficients have to be computed.
-  *
-  * \warning This version of the matrix product can be much much slower. So use it only if you know
-  * what you are doing and that you measured a true speed improvement.
-  *
-  * \sa operator*(const MatrixBase&)
+/** \internal used to test the evaluator only
  */
-template<typename Derived>
-template<typename OtherDerived>
-const typename LazyProductReturnType<Derived,OtherDerived>::Type
-MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
+template<typename Lhs,typename Rhs>
+const Product<Lhs,Rhs>
+prod(const Lhs& lhs, const Rhs& rhs)
 {
-  enum {
-    ProductIsValid =  Derived::ColsAtCompileTime==Dynamic
-                   || OtherDerived::RowsAtCompileTime==Dynamic
-                   || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
-    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
-    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
-  };
-  // note to the lost user:
-  //    * for a dot product use: v1.dot(v2)
-  //    * for a coeff-wise product use: v1.cwiseProduct(v2)
-  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
-    INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
-  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
-    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
-  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
-
-  return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+  return Product<Lhs,Rhs>(lhs,rhs);
 }

 #endif // EIGEN_PRODUCT_H
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@@ -256,16 +256,16 @@ class ScaledProduct
    : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {}

    template<typename Dest>
-    inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,m_alpha); }
+    inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); }

    template<typename Dest>
-    inline void addTo(Dest& dst) const { scaleAndAddTo(dst,m_alpha); }
+    inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); }

    template<typename Dest>
-    inline void subTo(Dest& dst) const { scaleAndAddTo(dst,-m_alpha); }
+    inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); }

    template<typename Dest>
-    inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha); }
+    inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha * m_alpha); }

    const Scalar& alpha() const { return m_alpha; }
    
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -219,15 +219,28 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
      alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
                ? Aligned : Unaligned
    };
-    const Index alignedSize = ((size-alignedStart)/packetSize)*packetSize;
-    const Index alignedEnd = alignedStart + alignedSize;
+    const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
+    const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
+    const Index alignedEnd2 = alignedStart + alignedSize2;
+    const Index alignedEnd  = alignedStart + alignedSize;
    Scalar res;
    if(alignedSize)
    {
-      PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
-      for(Index index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
-        packet_res = func.packetOp(packet_res, mat.template packet<alignment>(index));
-      res = func.predux(packet_res);
+      PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
+      if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
+      {
+        PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
+        for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
+        {
+          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
+          packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
+        }
+
+        packet_res0 = func.packetOp(packet_res0,packet_res1);
+        if(alignedEnd>alignedEnd2)
+          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
+      }
+      res = func.predux(packet_res0);

      for(Index index = 0; index < alignedStart; ++index)
        res = func(res,mat.coeff(index));
--- a/Eigen/src/Core/Replicate.h
+++ b/Eigen/src/Core/Replicate.h
@@ -122,6 +122,10 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
      return m_matrix.template packet<LoadMode>(actual_row, actual_col);
    }

+    const typename internal::remove_all<typename MatrixType::Nested>::type& nestedExpression() const 
+    { 
+      return m_matrix; 
+    }

  protected:
    const typename MatrixType::Nested m_matrix;
--- a/Eigen/src/Core/Reverse.h
+++ b/Eigen/src/Core/Reverse.h
@@ -183,6 +183,12 @@ template<typename MatrixType, int Direction> class Reverse
      m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
    }

+    const typename internal::remove_all<typename MatrixType::Nested>::type& 
+    nestedExpression() const 
+    {
+      return m_matrix;
+    }
+
  protected:
    const typename MatrixType::Nested m_matrix;
 };
--- a/Eigen/src/Core/Select.h
+++ b/Eigen/src/Core/Select.h
@@ -101,6 +101,21 @@ class Select : internal::no_assignment_operator,
        return m_else.coeff(i);
    }

+    const ConditionMatrixType& conditionMatrix() const
+    {
+      return m_condition;
+    }
+
+    const ThenMatrixType& thenMatrix() const
+    {
+      return m_then;
+    }
+
+    const ElseMatrixType& elseMatrix() const
+    {
+      return m_else;
+    }
+
  protected:
    const typename ConditionMatrixType::Nested m_condition;
    const typename ThenMatrixType::Nested m_then;
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -32,13 +32,13 @@
  * \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix
  *
  * \param MatrixType the type of the dense matrix storing the coefficients
-  * \param TriangularPart can be either \c Lower or \c Upper
+  * \param TriangularPart can be either \c #Lower or \c #Upper
  *
  * This class is an expression of a sefladjoint matrix from a triangular part of a matrix
  * with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()
  * and most of the time this is the only way that it is used.
  *
-  * \sa class TriangularBase, MatrixBase::selfAdjointView()
+  * \sa class TriangularBase, MatrixBase::selfadjointView()
  */

 namespace internal {
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -163,6 +163,16 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
      return Base::operator=(rhs);
    }

+    Lhs& expression() const 
+    { 
+      return m_matrix;
+    }
+
+    const BinaryOp& functor() const 
+    { 
+      return m_functor;
+    }
+
  protected:
    Lhs& m_matrix;
    const BinaryOp& m_functor;
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -74,26 +74,19 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
    // FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1

    bool useRhsDirectly = Rhs::InnerStrideAtCompileTime==1 || rhs.innerStride()==1;
-    RhsScalar* actualRhs;
-    if(useRhsDirectly)
-    {
-      actualRhs = &rhs.coeffRef(0);
-    }
-    else
-    {
-      actualRhs = ei_aligned_stack_new(RhsScalar,rhs.size());
+
+    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(),
+                                                  (useRhsDirectly ? rhs.data() : 0));
+                                                  
+    if(!useRhsDirectly)
      MappedRhs(actualRhs,rhs.size()) = rhs;
-    }

    triangular_solve_vector<LhsScalar, RhsScalar, typename Lhs::Index, Side, Mode, LhsProductTraits::NeedToConjugate,
                            (int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>
      ::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs);

    if(!useRhsDirectly)
-    {
      rhs = MappedRhs(actualRhs, rhs.size());
-      ei_aligned_stack_delete(RhsScalar, actualRhs, rhs.size());
-    }
  }
 };

--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -56,6 +56,7 @@ template<typename Derived>
 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
 MatrixBase<Derived>::stableNorm() const
 {
+  using std::min;
  const Index blockSize = 4096;
  RealScalar scale = 0;
  RealScalar invScale = 1;
@@ -68,7 +69,7 @@ MatrixBase<Derived>::stableNorm() const
  if (bi>0)
    internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
  for (; bi<n; bi+=blockSize)
-    internal::stable_norm_kernel(this->segment(bi,std::min(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
+    internal::stable_norm_kernel(this->segment(bi,(min)(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
  return scale * internal::sqrt(ssq);
 }

@@ -85,6 +86,9 @@ template<typename Derived>
 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
 MatrixBase<Derived>::blueNorm() const
 {
+  using std::pow;
+  using std::min;
+  using std::max;
  static Index nmax = -1;
  static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
  if(nmax <= 0)
@@ -99,25 +103,25 @@ MatrixBase<Derived>::blueNorm() const
    // For portability, the PORT subprograms "ilmaeh" and "rlmach"
    // are used. For any specific computer, each of the assignment
    // statements can be replaced
-    nbig  = std::numeric_limits<Index>::max();            // largest integer
+    nbig  = (std::numeric_limits<Index>::max)();            // largest integer
    ibeta = std::numeric_limits<RealScalar>::radix;         // base for floating-point numbers
    it    = std::numeric_limits<RealScalar>::digits;        // number of base-beta digits in mantissa
    iemin = std::numeric_limits<RealScalar>::min_exponent;  // minimum exponent
    iemax = std::numeric_limits<RealScalar>::max_exponent;  // maximum exponent
-    rbig  = std::numeric_limits<RealScalar>::max();         // largest floating-point number
+    rbig  = (std::numeric_limits<RealScalar>::max)();         // largest floating-point number

    iexp  = -((1-iemin)/2);
-    b1    = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp)));  // lower boundary of midrange
+    b1    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));  // lower boundary of midrange
    iexp  = (iemax + 1 - it)/2;
-    b2    = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp)));   // upper boundary of midrange
+    b2    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // upper boundary of midrange

    iexp  = (2-iemin)/2;
-    s1m   = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for lower range
+    s1m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for lower range
    iexp  = - ((iemax+it)/2);
-    s2m   = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for upper range
+    s2m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for upper range

    overfl  = rbig*s2m;             // overflow boundary for abig
-    eps     = RealScalar(std::pow(double(ibeta), 1-it));
+    eps     = RealScalar(pow(double(ibeta), 1-it));
    relerr  = internal::sqrt(eps);         // tolerance for neglecting asml
    abig    = RealScalar(1.0/eps - 1.0);
    if (RealScalar(nbig)>abig)  nmax = int(abig);  // largest safe n
@@ -163,8 +167,8 @@ MatrixBase<Derived>::blueNorm() const
  }
  else
    return internal::sqrt(amed);
-  asml = std::min(abig, amed);
-  abig = std::max(abig, amed);
+  asml = (min)(abig, amed);
+  abig = (max)(abig, amed);
  if(asml <= abig*relerr)
    return abig;
  else
--- a/Eigen/src/Core/Swap.h
+++ b/Eigen/src/Core/Swap.h
@@ -119,6 +119,8 @@ template<typename ExpressionType> class SwapWrapper
      _other.template writePacket<LoadMode>(index, tmp);
    }

+    ExpressionType& expression() const { return m_expression; }
+
  protected:
    ExpressionType& m_expression;
 };
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -350,15 +350,14 @@ struct blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
 template<bool DestIsTransposed, typename OtherDerived>
 struct check_transpose_aliasing_compile_time_selector
 {
-  enum { ret = blas_traits<OtherDerived>::IsTransposed != DestIsTransposed
-  };
+  enum { ret = bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed };
 };

 template<bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
 struct check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
 {
-  enum { ret =    blas_traits<DerivedA>::IsTransposed != DestIsTransposed
-               || blas_traits<DerivedB>::IsTransposed != DestIsTransposed
+  enum { ret =    bool(blas_traits<DerivedA>::IsTransposed) != DestIsTransposed
+               || bool(blas_traits<DerivedB>::IsTransposed) != DestIsTransposed
  };
 };

@@ -367,7 +366,7 @@ struct check_transpose_aliasing_run_time_selector
 {
  static bool run(const Scalar* dest, const OtherDerived& src)
  {
-    return (blas_traits<OtherDerived>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)extract_data(src));
+    return (bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed) && (dest!=0 && dest==(Scalar*)extract_data(src));
  }
 };

--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -111,6 +111,7 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
      EIGEN_ONLY_USED_FOR_DEBUG(col);
      eigen_assert(col>=0 && col<cols() && row>=0 && row<rows());
      const int mode = int(Mode) & ~SelfAdjoint;
+      EIGEN_ONLY_USED_FOR_DEBUG(mode);
      eigen_assert((mode==Upper && col>=row)
                || (mode==Lower && col<=row)
                || ((mode==StrictlyUpper || mode==UnitUpper) && col>row)
@@ -134,13 +135,13 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
  * \brief Base class for triangular part in a matrix
  *
  * \param MatrixType the type of the object in which we are taking the triangular part
-  * \param Mode the kind of triangular matrix expression to construct. Can be Upper,
-  *             Lower, UpperSelfadjoint, or LowerSelfadjoint. This is in fact a bit field;
-  *             it must have either Upper or Lower, and additionnaly it may have either
-  *             UnitDiag or Selfadjoint.
+  * \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
+  *             #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
+  *             This is in fact a bit field; it must have either #Upper or #Lower, 
+  *             and additionnaly it may have #UnitDiag or #ZeroDiag or neither.
  *
  * This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular
-  * matrices one should speak ok "trapezoid" parts. This class is the return type
+  * matrices one should speak of "trapezoid" parts. This class is the return type
  * of MatrixBase::triangularView() and most of the time this is the only way it is used.
  *
  * \sa MatrixBase::triangularView()
@@ -448,6 +449,8 @@ struct triangular_assignment_selector
    col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
    row = (UnrollCount-1) % Derived1::RowsAtCompileTime
  };
+  
+  typedef typename Derived1::Scalar Scalar;

  inline static void run(Derived1 &dst, const Derived2 &src)
  {
@@ -466,9 +469,9 @@ struct triangular_assignment_selector
    else if(ClearOpposite)
    {
      if (Mode&UnitDiag && row==col)
-        dst.coeffRef(row, col) = 1;
+        dst.coeffRef(row, col) = Scalar(1);
      else
-        dst.coeffRef(row, col) = 0;
+        dst.coeffRef(row, col) = Scalar(0);
    }
  }
 };
@@ -484,16 +487,17 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearOpposite>
 {
  typedef typename Derived1::Index Index;
+  typedef typename Derived1::Scalar Scalar;
  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    for(Index j = 0; j < dst.cols(); ++j)
    {
-      Index maxi = std::min(j, dst.rows()-1);
+      Index maxi = (std::min)(j, dst.rows()-1);
      for(Index i = 0; i <= maxi; ++i)
        dst.copyCoeff(i, j, src);
      if (ClearOpposite)
        for(Index i = maxi+1; i < dst.rows(); ++i)
-          dst.coeffRef(i, j) = 0;
+          dst.coeffRef(i, j) = Scalar(0);
    }
  }
 };
@@ -508,10 +512,10 @@ struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearO
    {
      for(Index i = j; i < dst.rows(); ++i)
        dst.copyCoeff(i, j, src);
-      Index maxi = std::min(j, dst.rows());
+      Index maxi = (std::min)(j, dst.rows());
      if (ClearOpposite)
        for(Index i = 0; i < maxi; ++i)
-          dst.coeffRef(i, j) = 0;
+          dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
    }
  }
 };
@@ -524,7 +528,7 @@ struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic
  {
    for(Index j = 0; j < dst.cols(); ++j)
    {
-      Index maxi = std::min(j, dst.rows());
+      Index maxi = (std::min)(j, dst.rows());
      for(Index i = 0; i < maxi; ++i)
        dst.copyCoeff(i, j, src);
      if (ClearOpposite)
@@ -544,10 +548,10 @@ struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic
    {
      for(Index i = j+1; i < dst.rows(); ++i)
        dst.copyCoeff(i, j, src);
-      Index maxi = std::min(j, dst.rows()-1);
+      Index maxi = (std::min)(j, dst.rows()-1);
      if (ClearOpposite)
        for(Index i = 0; i <= maxi; ++i)
-          dst.coeffRef(i, j) = 0;
+          dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
    }
  }
 };
@@ -560,7 +564,7 @@ struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, Cl
  {
    for(Index j = 0; j < dst.cols(); ++j)
    {
-      Index maxi = std::min(j, dst.rows());
+      Index maxi = (std::min)(j, dst.rows());
      for(Index i = 0; i < maxi; ++i)
        dst.copyCoeff(i, j, src);
      if (ClearOpposite)
@@ -580,7 +584,7 @@ struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, Cl
  {
    for(Index j = 0; j < dst.cols(); ++j)
    {
-      Index maxi = std::min(j, dst.rows());
+      Index maxi = (std::min)(j, dst.rows());
      for(Index i = maxi+1; i < dst.rows(); ++i)
        dst.copyCoeff(i, j, src);
      if (ClearOpposite)
@@ -756,8 +760,8 @@ typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Deriv
 /**
  * \returns an expression of a triangular view extracted from the current matrix
  *
-  * The parameter \a Mode can have the following values: \c Upper, \c StrictlyUpper, \c UnitUpper,
-  * \c Lower, \c StrictlyLower, \c UnitLower.
+  * The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
+  * \c #Lower, \c #StrictlyLower, \c #UnitLower.
  *
  * Example: \include MatrixBase_extract.cpp
  * Output: \verbinclude MatrixBase_extract.out
@@ -792,7 +796,7 @@ bool MatrixBase<Derived>::isUpperTriangular(RealScalar prec) const
  RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
  for(Index j = 0; j < cols(); ++j)
  {
-    Index maxi = std::min(j, rows()-1);
+    Index maxi = (std::min)(j, rows()-1);
    for(Index i = 0; i <= maxi; ++i)
    {
      RealScalar absValue = internal::abs(coeff(i,j));
@@ -824,7 +828,7 @@ bool MatrixBase<Derived>::isLowerTriangular(RealScalar prec) const
  RealScalar threshold = maxAbsOnLowerPart * prec;
  for(Index j = 1; j < cols(); ++j)
  {
-    Index maxi = std::min(j, rows()-1);
+    Index maxi = (std::min)(j, rows()-1);
    for(Index i = 0; i < maxi; ++i)
      if(internal::abs(coeff(i, j)) > threshold) return false;
  }
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -31,9 +31,9 @@
  *
  * \brief Generic expression of a partially reduxed matrix
  *
-  * \param MatrixType the type of the matrix we are applying the redux operation
-  * \param MemberOp type of the member functor
-  * \param Direction indicates the direction of the redux (Vertical or Horizontal)
+  * \tparam MatrixType the type of the matrix we are applying the redux operation
+  * \tparam MemberOp type of the member functor
+  * \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
  *
  * This class represents an expression of a partial redux operator of a matrix.
  * It is the return type of some VectorwiseOp functions,
@@ -164,7 +164,7 @@ struct member_redux {
  * \brief Pseudo expression providing partial reduction operations
  *
  * \param ExpressionType the type of the object on which to do partial reductions
-  * \param Direction indicates the direction of the redux (Vertical or Horizontal)
+  * \param Direction indicates the direction of the redux (#Vertical or #Horizontal)
  *
  * This class represents a pseudo expression with partial reduction features.
  * It is the return type of DenseBase::colwise() and DenseBase::rowwise()
@@ -237,7 +237,10 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
    typename ExtendedType<OtherDerived>::Type
    extendedTo(const DenseBase<OtherDerived>& other) const
    {
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
+      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxColsAtCompileTime==1),
+                          YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
+      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxRowsAtCompileTime==1),
+                          YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
      return typename ExtendedType<OtherDerived>::Type
                      (other.derived(),
                       Direction==Vertical   ? 1 : m_matrix.rows(),
@@ -418,10 +421,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
    ExpressionType& operator=(const DenseBase<OtherDerived>& other)
    {
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
      //eigen_assert((m_matrix.isNull()) == (other.isNull())); FIXME
-      for(Index j=0; j<subVectors(); ++j)
-        subVector(j) = other;
-      return const_cast<ExpressionType&>(m_matrix);
+      return const_cast<ExpressionType&>(m_matrix = extendedTo(other.derived()));
    }

    /** Adds the vector \a other to each subvector of \c *this */
@@ -429,9 +431,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
    ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
    {
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
-      for(Index j=0; j<subVectors(); ++j)
-        subVector(j) += other.derived();
-      return const_cast<ExpressionType&>(m_matrix);
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      return const_cast<ExpressionType&>(m_matrix += extendedTo(other.derived()));
    }

    /** Substracts the vector \a other to each subvector of \c *this */
@@ -439,8 +440,29 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
    ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
    {
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
-      for(Index j=0; j<subVectors(); ++j)
-        subVector(j) -= other.derived();
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      return const_cast<ExpressionType&>(m_matrix -= extendedTo(other.derived()));
+    }
+
+    /** Multiples each subvector of \c *this by the vector \a other */
+    template<typename OtherDerived>
+    ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      m_matrix *= extendedTo(other.derived());
+      return const_cast<ExpressionType&>(m_matrix);
+    }
+
+    /** Divides each subvector of \c *this by the vector \a other */
+    template<typename OtherDerived>
+    ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      m_matrix /= extendedTo(other.derived());
      return const_cast<ExpressionType&>(m_matrix);
    }

@@ -451,7 +473,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
                  const typename ExtendedType<OtherDerived>::Type>
    operator+(const DenseBase<OtherDerived>& other) const
    {
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
      return m_matrix + extendedTo(other.derived());
    }

@@ -462,10 +485,39 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
                  const typename ExtendedType<OtherDerived>::Type>
    operator-(const DenseBase<OtherDerived>& other) const
    {
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
      return m_matrix - extendedTo(other.derived());
    }

+    /** Returns the expression where each subvector is the product of the vector \a other
+      * by the corresponding subvector of \c *this */
+    template<typename OtherDerived> EIGEN_STRONG_INLINE
+    CwiseBinaryOp<internal::scalar_product_op<Scalar>,
+                  const ExpressionTypeNestedCleaned,
+                  const typename ExtendedType<OtherDerived>::Type>
+    operator*(const DenseBase<OtherDerived>& other) const
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      return m_matrix * extendedTo(other.derived());
+    }
+
+    /** Returns the expression where each subvector is the quotient of the corresponding
+      * subvector of \c *this by the vector \a other */
+    template<typename OtherDerived>
+    CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
+                  const ExpressionTypeNestedCleaned,
+                  const typename ExtendedType<OtherDerived>::Type>
+    operator/(const DenseBase<OtherDerived>& other) const
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      return m_matrix / extendedTo(other.derived());
+    }
+
 /////////// Geometry module ///////////

    #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
@@ -509,7 +561,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
  * Example: \include MatrixBase_colwise.cpp
  * Output: \verbinclude MatrixBase_colwise.out
  *
-  * \sa rowwise(), class VectorwiseOp
+  * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
  */
 template<typename Derived>
 inline const typename DenseBase<Derived>::ConstColwiseReturnType
@@ -520,7 +572,7 @@ DenseBase<Derived>::colwise() const

 /** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
  *
-  * \sa rowwise(), class VectorwiseOp
+  * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
  */
 template<typename Derived>
 inline typename DenseBase<Derived>::ColwiseReturnType
@@ -534,7 +586,7 @@ DenseBase<Derived>::colwise()
  * Example: \include MatrixBase_rowwise.cpp
  * Output: \verbinclude MatrixBase_rowwise.out
  *
-  * \sa colwise(), class VectorwiseOp
+  * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
  */
 template<typename Derived>
 inline const typename DenseBase<Derived>::ConstRowwiseReturnType
@@ -545,7 +597,7 @@ DenseBase<Derived>::rowwise() const

 /** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
  *
-  * \sa colwise(), class VectorwiseOp
+  * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
  */
 template<typename Derived>
 inline typename DenseBase<Derived>::RowwiseReturnType
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -27,8 +27,8 @@

 namespace internal {

-static uint32x4_t p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
-static uint32x2_t p2ui_CONJ_XOR = { 0x00000000, 0x80000000 };
+static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
+static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);

 //---------- float ----------
 struct Packet2cf
@@ -43,6 +43,7 @@ template<> struct packet_traits<std::complex<float> >  : default_packet_traits
  typedef Packet2cf type;
  enum {
    Vectorizable = 1,
+    AlignedOnScalar = 1,
    size = 2,

    HasAdd    = 1,
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -52,6 +52,16 @@ typedef uint32x4_t  Packet4ui;
 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
  const Packet4i p4i_##NAME = pset1<Packet4i>(X)

+#if defined(__llvm__) && !defined(__clang__)
+  //Special treatment for Apple's llvm-gcc, its NEON packet types are unions
+  #define EIGEN_INIT_NEON_PACKET2(X, Y)       {{X, Y}}
+  #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
+#else
+  //Default initializer for packets
+  #define EIGEN_INIT_NEON_PACKET2(X, Y)       {X, Y}
+  #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
+#endif
+    
 #ifndef __pld
 #define __pld(x) asm volatile ( "   pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
 #endif
@@ -84,7 +94,7 @@ template<> struct packet_traits<int>    : default_packet_traits
  };
 };

-#if EIGEN_GNUC_AT_MOST(4,4)
+#if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__)
 // workaround gcc 4.2, 4.3 and 4.4 compilatin issue
 EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
 EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
@@ -100,12 +110,12 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from)   {

 template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a)
 {
-  Packet4f countdown = { 3, 2, 1, 0 };
+  Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
  return vaddq_f32(pset1<Packet4f>(a), countdown);
 }
 template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a)
 {
-  Packet4i countdown = { 3, 2, 1, 0 };
+  Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
  return vaddq_s32(pset1<Packet4i>(a), countdown);
 }

@@ -191,14 +201,14 @@ template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float*   from)
 {
  float32x2_t lo, hi;
  lo = vdup_n_f32(*from);
-  hi = vdup_n_f32(*from);
+  hi = vdup_n_f32(*(from+1));
  return vcombine_f32(lo, hi);
 }
 template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int*     from)
 {
  int32x2_t lo, hi;
  lo = vdup_n_s32(*from);
-  hi = vdup_n_s32(*from);
+  hi = vdup_n_s32(*(from+1));
  return vcombine_s32(lo, hi);
 }

@@ -395,25 +405,29 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
  return s[0];
 }

-template<int Offset>
-struct palign_impl<Offset,Packet4f>
-{
-  EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
-  {
-    if (Offset!=0)
-      first = vextq_f32(first, second, Offset);
-  }
-};
+// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
+// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
+#define PALIGN_NEON(Offset,Type,Command) \
+template<>\
+struct palign_impl<Offset,Type>\
+{\
+    EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
+    {\
+        if (Offset!=0)\
+            first = Command(first, second, Offset);\
+    }\
+};\

-template<int Offset>
-struct palign_impl<Offset,Packet4i>
-{
-  EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
-  {
-    if (Offset!=0)
-      first = vextq_s32(first, second, Offset);
-  }
-};
+PALIGN_NEON(0,Packet4f,vextq_f32)
+PALIGN_NEON(1,Packet4f,vextq_f32)
+PALIGN_NEON(2,Packet4f,vextq_f32)
+PALIGN_NEON(3,Packet4f,vextq_f32)
+PALIGN_NEON(0,Packet4i,vextq_s32)
+PALIGN_NEON(1,Packet4i,vextq_s32)
+PALIGN_NEON(2,Packet4i,vextq_s32)
+PALIGN_NEON(3,Packet4i,vextq_s32)
+    
+#undef PALIGN_NEON

 } // end namespace internal

--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -110,9 +110,18 @@ template<> struct unpacket_traits<Packet4f> { typedef float  type; enum {size=4}
 template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
 template<> struct unpacket_traits<Packet4i> { typedef int    type; enum {size=4}; };

+#if defined(_MSC_VER) && (_MSC_VER==1500)
+// Workaround MSVC 9 internal compiler error.
+// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
+// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set_ps(from,from,from,from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set_epi32(from,from,from,from); }
+#else
 template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set1_ps(from); }
 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set1_epi32(from); }
+#endif

 template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
 template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -81,6 +81,7 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdi
 template<typename LhsScalar, typename RhsScalar, int KcFactor>
 void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
 {
+  EIGEN_UNUSED_VARIABLE(n);
  // Explanations:
  // Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
  // mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
@@ -102,7 +103,6 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
  k = std::min<std::ptrdiff_t>(k, l1/kdiv);
  std::ptrdiff_t _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
  if(_m<m) m = _m & mr_mask;
-  n = n;
 }

 template<typename LhsScalar, typename RhsScalar>
@@ -118,14 +118,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
  // FIXME (a bit overkill maybe ?)

  template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
-    EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
+    EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
    {
      c = cj.pmadd(a,b,c);
    }
  };

  template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
-    EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, T& a, T& b, T& c, T& t)
+    EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
    {
      t = b; t = cj.pmul(a,t); c = padd(c,t);
    }
@@ -536,7 +536,7 @@ struct gebp_kernel
    ResPacketSize = Traits::ResPacketSize
  };

-  EIGEN_FLATTEN_ATTRIB
+  EIGEN_DONT_INLINE EIGEN_FLATTEN_ATTRIB
  void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
  {
@@ -598,64 +598,64 @@ struct gebp_kernel
          if(nr==2)
          {
            LhsPacket A0, A1;
-            RhsPacket B0;
+            RhsPacket B_0;
            RhsPacket T0;
            
 EIGEN_ASM_COMMENT("mybegin2");
            traits.loadLhs(&blA[0*LhsProgress], A0);
            traits.loadLhs(&blA[1*LhsProgress], A1);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
-            traits.loadRhs(&blB[1*RhsProgress], B0);
-            traits.madd(A0,B0,C1,T0);
-            traits.madd(A1,B0,C5,B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
+            traits.loadRhs(&blB[1*RhsProgress], B_0);
+            traits.madd(A0,B_0,C1,T0);
+            traits.madd(A1,B_0,C5,B_0);

            traits.loadLhs(&blA[2*LhsProgress], A0);
            traits.loadLhs(&blA[3*LhsProgress], A1);
-            traits.loadRhs(&blB[2*RhsProgress], B0);
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
-            traits.loadRhs(&blB[3*RhsProgress], B0);
-            traits.madd(A0,B0,C1,T0);
-            traits.madd(A1,B0,C5,B0);
+            traits.loadRhs(&blB[2*RhsProgress], B_0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
+            traits.loadRhs(&blB[3*RhsProgress], B_0);
+            traits.madd(A0,B_0,C1,T0);
+            traits.madd(A1,B_0,C5,B_0);

            traits.loadLhs(&blA[4*LhsProgress], A0);
            traits.loadLhs(&blA[5*LhsProgress], A1);
-            traits.loadRhs(&blB[4*RhsProgress], B0);
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
-            traits.loadRhs(&blB[5*RhsProgress], B0);
-            traits.madd(A0,B0,C1,T0);
-            traits.madd(A1,B0,C5,B0);
+            traits.loadRhs(&blB[4*RhsProgress], B_0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
+            traits.loadRhs(&blB[5*RhsProgress], B_0);
+            traits.madd(A0,B_0,C1,T0);
+            traits.madd(A1,B_0,C5,B_0);

            traits.loadLhs(&blA[6*LhsProgress], A0);
            traits.loadLhs(&blA[7*LhsProgress], A1);
-            traits.loadRhs(&blB[6*RhsProgress], B0);
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
-            traits.loadRhs(&blB[7*RhsProgress], B0);
-            traits.madd(A0,B0,C1,T0);
-            traits.madd(A1,B0,C5,B0);
+            traits.loadRhs(&blB[6*RhsProgress], B_0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
+            traits.loadRhs(&blB[7*RhsProgress], B_0);
+            traits.madd(A0,B_0,C1,T0);
+            traits.madd(A1,B_0,C5,B_0);
 EIGEN_ASM_COMMENT("myend");
          }
          else
          {
 EIGEN_ASM_COMMENT("mybegin4");
            LhsPacket A0, A1;
-            RhsPacket B0, B1, B2, B3;
+            RhsPacket B_0, B1, B2, B3;
            RhsPacket T0;
            
            traits.loadLhs(&blA[0*LhsProgress], A0);
            traits.loadLhs(&blA[1*LhsProgress], A1);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
            traits.loadRhs(&blB[1*RhsProgress], B1);

-            traits.madd(A0,B0,C0,T0);
+            traits.madd(A0,B_0,C0,T0);
            traits.loadRhs(&blB[2*RhsProgress], B2);
-            traits.madd(A1,B0,C4,B0);
+            traits.madd(A1,B_0,C4,B_0);
            traits.loadRhs(&blB[3*RhsProgress], B3);
-            traits.loadRhs(&blB[4*RhsProgress], B0);
+            traits.loadRhs(&blB[4*RhsProgress], B_0);
            traits.madd(A0,B1,C1,T0);
            traits.madd(A1,B1,C5,B1);
            traits.loadRhs(&blB[5*RhsProgress], B1);
@@ -667,9 +667,9 @@ EIGEN_ASM_COMMENT("mybegin4");
            traits.madd(A1,B3,C7,B3);
            traits.loadLhs(&blA[3*LhsProgress], A1);
            traits.loadRhs(&blB[7*RhsProgress], B3);
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
-            traits.loadRhs(&blB[8*RhsProgress], B0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
+            traits.loadRhs(&blB[8*RhsProgress], B_0);
            traits.madd(A0,B1,C1,T0);
            traits.madd(A1,B1,C5,B1);
            traits.loadRhs(&blB[9*RhsProgress], B1);
@@ -682,9 +682,9 @@ EIGEN_ASM_COMMENT("mybegin4");
            traits.loadLhs(&blA[5*LhsProgress], A1);
            traits.loadRhs(&blB[11*RhsProgress], B3);

-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
-            traits.loadRhs(&blB[12*RhsProgress], B0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
+            traits.loadRhs(&blB[12*RhsProgress], B_0);
            traits.madd(A0,B1,C1,T0);
            traits.madd(A1,B1,C5,B1);
            traits.loadRhs(&blB[13*RhsProgress], B1);
@@ -696,8 +696,8 @@ EIGEN_ASM_COMMENT("mybegin4");
            traits.madd(A1,B3,C7,B3);
            traits.loadLhs(&blA[7*LhsProgress], A1);
            traits.loadRhs(&blB[15*RhsProgress], B3);
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
            traits.madd(A0,B1,C1,T0);
            traits.madd(A1,B1,C5,B1);
            traits.madd(A0,B2,C2,T0);
@@ -715,32 +715,32 @@ EIGEN_ASM_COMMENT("mybegin4");
          if(nr==2)
          {
            LhsPacket A0, A1;
-            RhsPacket B0;
+            RhsPacket B_0;
            RhsPacket T0;

            traits.loadLhs(&blA[0*LhsProgress], A0);
            traits.loadLhs(&blA[1*LhsProgress], A1);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
-            traits.loadRhs(&blB[1*RhsProgress], B0);
-            traits.madd(A0,B0,C1,T0);
-            traits.madd(A1,B0,C5,B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
+            traits.loadRhs(&blB[1*RhsProgress], B_0);
+            traits.madd(A0,B_0,C1,T0);
+            traits.madd(A1,B_0,C5,B_0);
          }
          else
          {
            LhsPacket A0, A1;
-            RhsPacket B0, B1, B2, B3;
+            RhsPacket B_0, B1, B2, B3;
            RhsPacket T0;

            traits.loadLhs(&blA[0*LhsProgress], A0);
            traits.loadLhs(&blA[1*LhsProgress], A1);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
            traits.loadRhs(&blB[1*RhsProgress], B1);

-            traits.madd(A0,B0,C0,T0);
+            traits.madd(A0,B_0,C0,T0);
            traits.loadRhs(&blB[2*RhsProgress], B2);
-            traits.madd(A1,B0,C4,B0);
+            traits.madd(A1,B_0,C4,B_0);
            traits.loadRhs(&blB[3*RhsProgress], B3);
            traits.madd(A0,B1,C1,T0);
            traits.madd(A1,B1,C5,B1);
@@ -827,42 +827,42 @@ EIGEN_ASM_COMMENT("mybegin4");
          if(nr==2)
          {
            LhsPacket A0;
-            RhsPacket B0, B1;
+            RhsPacket B_0, B1;

            traits.loadLhs(&blA[0*LhsProgress], A0);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
            traits.loadRhs(&blB[1*RhsProgress], B1);
-            traits.madd(A0,B0,C0,B0);
-            traits.loadRhs(&blB[2*RhsProgress], B0);
+            traits.madd(A0,B_0,C0,B_0);
+            traits.loadRhs(&blB[2*RhsProgress], B_0);
            traits.madd(A0,B1,C1,B1);
            traits.loadLhs(&blA[1*LhsProgress], A0);
            traits.loadRhs(&blB[3*RhsProgress], B1);
-            traits.madd(A0,B0,C0,B0);
-            traits.loadRhs(&blB[4*RhsProgress], B0);
+            traits.madd(A0,B_0,C0,B_0);
+            traits.loadRhs(&blB[4*RhsProgress], B_0);
            traits.madd(A0,B1,C1,B1);
            traits.loadLhs(&blA[2*LhsProgress], A0);
            traits.loadRhs(&blB[5*RhsProgress], B1);
-            traits.madd(A0,B0,C0,B0);
-            traits.loadRhs(&blB[6*RhsProgress], B0);
+            traits.madd(A0,B_0,C0,B_0);
+            traits.loadRhs(&blB[6*RhsProgress], B_0);
            traits.madd(A0,B1,C1,B1);
            traits.loadLhs(&blA[3*LhsProgress], A0);
            traits.loadRhs(&blB[7*RhsProgress], B1);
-            traits.madd(A0,B0,C0,B0);
+            traits.madd(A0,B_0,C0,B_0);
            traits.madd(A0,B1,C1,B1);
          }
          else
          {
            LhsPacket A0;
-            RhsPacket B0, B1, B2, B3;
+            RhsPacket B_0, B1, B2, B3;

            traits.loadLhs(&blA[0*LhsProgress], A0);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
            traits.loadRhs(&blB[1*RhsProgress], B1);

-            traits.madd(A0,B0,C0,B0);
+            traits.madd(A0,B_0,C0,B_0);
            traits.loadRhs(&blB[2*RhsProgress], B2);
            traits.loadRhs(&blB[3*RhsProgress], B3);
-            traits.loadRhs(&blB[4*RhsProgress], B0);
+            traits.loadRhs(&blB[4*RhsProgress], B_0);
            traits.madd(A0,B1,C1,B1);
            traits.loadRhs(&blB[5*RhsProgress], B1);
            traits.madd(A0,B2,C2,B2);
@@ -870,8 +870,8 @@ EIGEN_ASM_COMMENT("mybegin4");
            traits.madd(A0,B3,C3,B3);
            traits.loadLhs(&blA[1*LhsProgress], A0);
            traits.loadRhs(&blB[7*RhsProgress], B3);
-            traits.madd(A0,B0,C0,B0);
-            traits.loadRhs(&blB[8*RhsProgress], B0);
+            traits.madd(A0,B_0,C0,B_0);
+            traits.loadRhs(&blB[8*RhsProgress], B_0);
            traits.madd(A0,B1,C1,B1);
            traits.loadRhs(&blB[9*RhsProgress], B1);
            traits.madd(A0,B2,C2,B2);
@@ -880,8 +880,8 @@ EIGEN_ASM_COMMENT("mybegin4");
            traits.loadLhs(&blA[2*LhsProgress], A0);
            traits.loadRhs(&blB[11*RhsProgress], B3);

-            traits.madd(A0,B0,C0,B0);
-            traits.loadRhs(&blB[12*RhsProgress], B0);
+            traits.madd(A0,B_0,C0,B_0);
+            traits.loadRhs(&blB[12*RhsProgress], B_0);
            traits.madd(A0,B1,C1,B1);
            traits.loadRhs(&blB[13*RhsProgress], B1);
            traits.madd(A0,B2,C2,B2);
@@ -890,7 +890,7 @@ EIGEN_ASM_COMMENT("mybegin4");

            traits.loadLhs(&blA[3*LhsProgress], A0);
            traits.loadRhs(&blB[15*RhsProgress], B3);
-            traits.madd(A0,B0,C0,B0);
+            traits.madd(A0,B_0,C0,B_0);
            traits.madd(A0,B1,C1,B1);
            traits.madd(A0,B2,C2,B2);
            traits.madd(A0,B3,C3,B3);
@@ -905,26 +905,26 @@ EIGEN_ASM_COMMENT("mybegin4");
          if(nr==2)
          {
            LhsPacket A0;
-            RhsPacket B0, B1;
+            RhsPacket B_0, B1;

            traits.loadLhs(&blA[0*LhsProgress], A0);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
            traits.loadRhs(&blB[1*RhsProgress], B1);
-            traits.madd(A0,B0,C0,B0);
+            traits.madd(A0,B_0,C0,B_0);
            traits.madd(A0,B1,C1,B1);
          }
          else
          {
            LhsPacket A0;
-            RhsPacket B0, B1, B2, B3;
+            RhsPacket B_0, B1, B2, B3;

            traits.loadLhs(&blA[0*LhsProgress], A0);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
            traits.loadRhs(&blB[1*RhsProgress], B1);
            traits.loadRhs(&blB[2*RhsProgress], B2);
            traits.loadRhs(&blB[3*RhsProgress], B3);

-            traits.madd(A0,B0,C0,B0);
+            traits.madd(A0,B_0,C0,B_0);
            traits.madd(A0,B1,C1,B1);
            traits.madd(A0,B2,C2,B2);
            traits.madd(A0,B3,C3,B3);
@@ -971,26 +971,26 @@ EIGEN_ASM_COMMENT("mybegin4");
          if(nr==2)
          {
            LhsScalar A0;
-            RhsScalar B0, B1;
+            RhsScalar B_0, B1;

            A0 = blA[k];
-            B0 = blB[0];
+            B_0 = blB[0];
            B1 = blB[1];
-            MADD(cj,A0,B0,C0,B0);
+            MADD(cj,A0,B_0,C0,B_0);
            MADD(cj,A0,B1,C1,B1);
          }
          else
          {
            LhsScalar A0;
-            RhsScalar B0, B1, B2, B3;
+            RhsScalar B_0, B1, B2, B3;

            A0 = blA[k];
-            B0 = blB[0];
+            B_0 = blB[0];
            B1 = blB[1];
            B2 = blB[2];
            B3 = blB[3];

-            MADD(cj,A0,B0,C0,B0);
+            MADD(cj,A0,B_0,C0,B_0);
            MADD(cj,A0,B1,C1,B1);
            MADD(cj,A0,B2,C2,B2);
            MADD(cj,A0,B3,C3,B3);
@@ -1027,14 +1027,14 @@ EIGEN_ASM_COMMENT("mybegin4");
        for(Index k=0; k<depth; k++)
        {
          LhsPacket A0, A1;
-          RhsPacket B0;
+          RhsPacket B_0;
          RhsPacket T0;

          traits.loadLhs(&blA[0*LhsProgress], A0);
          traits.loadLhs(&blA[1*LhsProgress], A1);
-          traits.loadRhs(&blB[0*RhsProgress], B0);
-          traits.madd(A0,B0,C0,T0);
-          traits.madd(A1,B0,C4,B0);
+          traits.loadRhs(&blB[0*RhsProgress], B_0);
+          traits.madd(A0,B_0,C0,T0);
+          traits.madd(A1,B_0,C4,B_0);

          blB += RhsProgress;
          blA += 2*LhsProgress;
@@ -1066,10 +1066,10 @@ EIGEN_ASM_COMMENT("mybegin4");
        for(Index k=0; k<depth; k++)
        {
          LhsPacket A0;
-          RhsPacket B0;
+          RhsPacket B_0;
          traits.loadLhs(blA, A0);
-          traits.loadRhs(blB, B0);
-          traits.madd(A0, B0, C0, B0);
+          traits.loadRhs(blB, B_0);
+          traits.madd(A0, B_0, C0, B_0);
          blB += RhsProgress;
          blA += LhsProgress;
        }
@@ -1091,8 +1091,8 @@ EIGEN_ASM_COMMENT("mybegin4");
        for(Index k=0; k<depth; k++)
        {
          LhsScalar A0 = blA[k];
-          RhsScalar B0 = blB[k];
-          MADD(cj, A0, B0, C0, B0);
+          RhsScalar B_0 = blB[k];
+          MADD(cj, A0, B_0, C0, B_0);
        }
        res[(j2+0)*resStride + i] += alpha*C0;
      }
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -78,7 +78,7 @@ static void run(Index rows, Index cols, Index depth,
  typedef gebp_traits<LhsScalar,RhsScalar> Traits;

  Index kc = blocking.kc();                 // cache block size along the K direction
-  Index mc = std::min(rows,blocking.mc());  // cache block size along the M direction
+  Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
  //Index nc = blocking.nc(); // cache block size along the N direction

  gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@@ -94,15 +94,16 @@ static void run(Index rows, Index cols, Index depth,
    
    std::size_t sizeA = kc*mc;
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
-    LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, sizeA);
-    RhsScalar* w = ei_aligned_stack_new(RhsScalar, sizeW);
+    ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, 0);
+    ei_declare_aligned_stack_constructed_variable(RhsScalar, w, sizeW, 0);
+    
    RhsScalar* blockB = blocking.blockB();
    eigen_internal_assert(blockB!=0);

    // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
    for(Index k=0; k<depth; k+=kc)
    {
-      const Index actual_kc = std::min(k+kc,depth)-k; // => rows of B', and cols of the A'
+      const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A'

      // In order to reduce the chance that a thread has to wait for the other,
      // let's start by packing A'.
@@ -139,7 +140,7 @@ static void run(Index rows, Index cols, Index depth,
      // Then keep going as usual with the remaining A'
      for(Index i=mc; i<rows; i+=mc)
      {
-        const Index actual_mc = std::min(i+mc,rows)-i;
+        const Index actual_mc = (std::min)(i+mc,rows)-i;

        // pack A_i,k to A'
        pack_lhs(blockA, &lhs(i,k), lhsStride, actual_kc, actual_mc);
@@ -154,9 +155,6 @@ static void run(Index rows, Index cols, Index depth,
        #pragma omp atomic
        --(info[j].users);
    }
-
-    ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
-    ei_aligned_stack_delete(RhsScalar, w, sizeW);
  }
  else
 #endif // EIGEN_HAS_OPENMP
@@ -167,15 +165,16 @@ static void run(Index rows, Index cols, Index depth,
    std::size_t sizeA = kc*mc;
    std::size_t sizeB = kc*cols;
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
-    LhsScalar *blockA = blocking.blockA()==0 ? ei_aligned_stack_new(LhsScalar, sizeA) : blocking.blockA();
-    RhsScalar *blockB = blocking.blockB()==0 ? ei_aligned_stack_new(RhsScalar, sizeB) : blocking.blockB();
-    RhsScalar *blockW = blocking.blockW()==0 ? ei_aligned_stack_new(RhsScalar, sizeW) : blocking.blockW();
+
+    ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
+    ei_declare_aligned_stack_constructed_variable(RhsScalar, blockW, sizeW, blocking.blockW());

    // For each horizontal panel of the rhs, and corresponding panel of the lhs...
    // (==GEMM_VAR1)
    for(Index k2=0; k2<depth; k2+=kc)
    {
-      const Index actual_kc = std::min(k2+kc,depth)-k2;
+      const Index actual_kc = (std::min)(k2+kc,depth)-k2;

      // OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
      // => Pack rhs's panel into a sequential chunk of memory (L2 caching)
@@ -188,7 +187,7 @@ static void run(Index rows, Index cols, Index depth,
      // (==GEPP_VAR1)
      for(Index i2=0; i2<rows; i2+=mc)
      {
-        const Index actual_mc = std::min(i2+mc,rows)-i2;
+        const Index actual_mc = (std::min)(i2+mc,rows)-i2;

        // We pack the lhs's block into a sequential chunk of memory (L1 caching)
        // Note that this block will be read a very high number of times, which is equal to the number of
@@ -200,10 +199,6 @@ static void run(Index rows, Index cols, Index depth,

      }
    }
-
-    if(blocking.blockA()==0) ei_aligned_stack_delete(LhsScalar, blockA, sizeA);
-    if(blocking.blockB()==0) ei_aligned_stack_delete(RhsScalar, blockB, sizeB);
-    if(blocking.blockW()==0) ei_aligned_stack_delete(RhsScalar, blockW, sizeW);
  }
 }

--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -83,10 +83,10 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
    if(mc > Traits::nr)
      mc = (mc/Traits::nr)*Traits::nr;

-    LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, kc*mc);
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
    std::size_t sizeB = sizeW + kc*size;
-    RhsScalar* allocatedBlockB = ei_aligned_stack_new(RhsScalar, sizeB);
+    ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0);
+    ei_declare_aligned_stack_constructed_variable(RhsScalar, allocatedBlockB, sizeB, 0);
    RhsScalar* blockB = allocatedBlockB + sizeW;
    
    gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@@ -96,14 +96,14 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,

    for(Index k2=0; k2<depth; k2+=kc)
    {
-      const Index actual_kc = std::min(k2+kc,depth)-k2;
+      const Index actual_kc = (std::min)(k2+kc,depth)-k2;

      // note that the actual rhs is the transpose/adjoint of mat
      pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, size);

      for(Index i2=0; i2<size; i2+=mc)
      {
-        const Index actual_mc = std::min(i2+mc,size)-i2;
+        const Index actual_mc = (std::min)(i2+mc,size)-i2;

        pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);

@@ -112,7 +112,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
        //  2 - the actual_mc x actual_mc symmetric block => processed with a special kernel
        //  3 - after the diagonal => processed with gebp or skipped
        if (UpLo==Lower)
-          gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2), alpha,
+          gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha,
               -1, -1, 0, 0, allocatedBlockB);

        sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB);
@@ -120,13 +120,11 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
        if (UpLo==Upper)
        {
          Index j2 = i2+actual_mc;
-          gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0), size-j2), alpha,
+          gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, (std::max)(Index(0), size-j2), alpha,
               -1, -1, 0, 0, allocatedBlockB);
        }
      }
    }
-    ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
-    ei_aligned_stack_delete(RhsScalar, allocatedBlockB, sizeB);
  }
 };

--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -134,7 +134,7 @@ EIGEN_DONT_INLINE static void run(
    }
    else
    {
-      skipColumns = std::min(skipColumns,cols);
+      skipColumns = (std::min)(skipColumns,cols);
      // note that the skiped columns are processed later.
    }

@@ -386,7 +386,7 @@ EIGEN_DONT_INLINE static void run(
    }
    else
    {
-      skipRows = std::min(skipRows,Index(rows));
+      skipRows = (std::min)(skipRows,Index(rows));
      // note that the skiped columns are processed later.
    }
    eigen_internal_assert(  alignmentPattern==NoneAligned
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -114,7 +114,7 @@ struct symm_pack_rhs
    }

    // second part: diagonal block
-    for(Index j2=k2; j2<std::min(k2+rows,packet_cols); j2+=nr)
+    for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
    {
      // again we can split vertically in three different parts (transpose, symmetric, normal)
      // transpose
@@ -179,7 +179,7 @@ struct symm_pack_rhs
    for(Index j2=packet_cols; j2<cols; ++j2)
    {
      // transpose
-      Index half = std::min(end_k,j2);
+      Index half = (std::min)(end_k,j2);
      for(Index k=k2; k<half; k++)
      {
        blockB[count] = conj(rhs(j2,k));
@@ -261,12 +261,12 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
    Index nc = cols;  // cache block size along the N direction
    computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
    // kc must smaller than mc
-    kc = std::min(kc,mc);
+    kc = (std::min)(kc,mc);

-    Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
    std::size_t sizeB = sizeW + kc*cols;
-    Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
+    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
    Scalar* blockB = allocatedBlockB + sizeW;

    gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
@@ -276,7 +276,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs

    for(Index k2=0; k2<size; k2+=kc)
    {
-      const Index actual_kc = std::min(k2+kc,size)-k2;
+      const Index actual_kc = (std::min)(k2+kc,size)-k2;

      // we have selected one row panel of rhs and one column panel of lhs
      // pack rhs's panel into a sequential chunk of memory
@@ -289,7 +289,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
      //  3 - the panel below the diagonal block => generic packed copy
      for(Index i2=0; i2<k2; i2+=mc)
      {
-        const Index actual_mc = std::min(i2+mc,k2)-i2;
+        const Index actual_mc = (std::min)(i2+mc,k2)-i2;
        // transposed packed copy
        pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);

@@ -297,7 +297,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
      }
      // the block diagonal
      {
-        const Index actual_mc = std::min(k2+kc,size)-k2;
+        const Index actual_mc = (std::min)(k2+kc,size)-k2;
        // symmetric packed copy
        pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);

@@ -306,16 +306,13 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs

      for(Index i2=k2+kc; i2<size; i2+=mc)
      {
-        const Index actual_mc = std::min(i2+mc,size)-i2;
+        const Index actual_mc = (std::min)(i2+mc,size)-i2;
        gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
          (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);

        gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
      }
    }
-
-    ei_aligned_stack_delete(Scalar, blockA, kc*mc);
-    ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
  }
 };

@@ -343,11 +340,10 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
    Index mc = rows;  // cache block size along the M direction
    Index nc = cols;  // cache block size along the N direction
    computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
-
-    Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
    std::size_t sizeB = sizeW + kc*cols;
-    Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
+    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
    Scalar* blockB = allocatedBlockB + sizeW;

    gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
@@ -356,22 +352,19 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh

    for(Index k2=0; k2<size; k2+=kc)
    {
-      const Index actual_kc = std::min(k2+kc,size)-k2;
+      const Index actual_kc = (std::min)(k2+kc,size)-k2;

      pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);

      // => GEPP
      for(Index i2=0; i2<rows; i2+=mc)
      {
-        const Index actual_mc = std::min(i2+mc,rows)-i2;
+        const Index actual_mc = (std::min)(i2+mc,rows)-i2;
        pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);

        gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
      }
    }
-
-    ei_aligned_stack_delete(Scalar, blockA, kc*mc);
-    ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
  }
 };

--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -62,17 +62,15 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
  // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
  // if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
  // this is because we need to extract packets
-  const Scalar* EIGEN_RESTRICT rhs = _rhs;
+  ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0);  
  if (rhsIncr!=1)
  {
-    Scalar* r = ei_aligned_stack_new(Scalar, size);
    const Scalar* it = _rhs;
    for (Index i=0; i<size; ++i, it+=rhsIncr)
-      r[i] = *it;
-    rhs = r;
+      rhs[i] = *it;
  }

-  Index bound = std::max(Index(0),size-8) & 0xfffffffe;
+  Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
  if (FirstTriangular)
    bound = size - bound;

@@ -160,9 +158,6 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
    }
    res[j] += alpha * t2;
  }
-
-  if(rhsIncr!=1)
-    ei_aligned_stack_delete(Scalar, const_cast<Scalar*>(rhs), size);
 }

 } // end namespace internal 
@@ -211,40 +206,28 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
    
    internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
    internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
+
+    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
+                                                  EvalToDest ? dest.data() : static_dest.data());
+                                                  
+    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(),
+        UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data());
    
-    bool freeDestPtr = false;
-    ResScalar* actualDestPtr;
-    if(EvalToDest)
-      actualDestPtr = dest.data();
-    else
+    if(!EvalToDest)
    {
      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      int size = dest.size();
      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      #endif
-      if((actualDestPtr=static_dest.data())==0)
-      {
-        freeDestPtr = true;
-        actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
-      }
      MappedDest(actualDestPtr, dest.size()) = dest;
    }
      
-    bool freeRhsPtr = false;
-    RhsScalar* actualRhsPtr;
-    if(UseRhs)
-      actualRhsPtr = const_cast<RhsScalar*>(rhs.data());
-    else
+    if(!UseRhs)
    {
      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      int size = rhs.size();
      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      #endif
-      if((actualRhsPtr=static_rhs.data())==0)
-      {
-        freeRhsPtr = true;
-        actualRhsPtr = ei_aligned_stack_new(RhsScalar,rhs.size());
-      }
      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
    }
      
@@ -259,11 +242,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
      );
    
    if(!EvalToDest)
-    {
      dest = MappedDest(actualDestPtr, dest.size());
-      if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
-    }
-    if(freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, rhs.size());
  }
 };

--- a/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/Eigen/src/Core/products/SelfadjointProduct.h
@@ -81,27 +81,17 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
      UseOtherDirectly = _ActualOtherType::InnerStrideAtCompileTime==1
    };
    internal::gemv_static_vector_if<Scalar,OtherType::SizeAtCompileTime,OtherType::MaxSizeAtCompileTime,!UseOtherDirectly> static_other;
-    
-    bool freeOtherPtr = false;
-    Scalar* actualOtherPtr;
-    if(UseOtherDirectly)
-      actualOtherPtr = const_cast<Scalar*>(actualOther.data());
-    else
-    {
-      if((actualOtherPtr=static_other.data())==0)
-      {
-        freeOtherPtr = true;
-        actualOtherPtr = ei_aligned_stack_new(Scalar,other.size());
-      }
+
+    ei_declare_aligned_stack_constructed_variable(Scalar, actualOtherPtr, other.size(),
+      (UseOtherDirectly ? const_cast<Scalar*>(actualOther.data()) : static_other.data()));
+      
+    if(!UseOtherDirectly)
      Map<typename _ActualOtherType::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther;
-    }
    
    selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
                              OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,
                            (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
          ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualAlpha);
-    
-    if((!UseOtherDirectly) && freeOtherPtr) ei_aligned_stack_delete(Scalar, actualOtherPtr, other.size());
  }
 };

--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -96,33 +96,38 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
                                           LhsStorageOrder,ConjugateLhs,
                                           RhsStorageOrder,ConjugateRhs,ColMajor>
 {
+  
+  typedef gebp_traits<Scalar,Scalar> Traits;
+  enum {
+    SmallPanelWidth   = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+    IsLower = (Mode&Lower) == Lower,
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
+  };

  static EIGEN_DONT_INLINE void run(
-    Index rows, Index cols, Index depth,
+    Index _rows, Index _cols, Index _depth,
    const Scalar* _lhs, Index lhsStride,
    const Scalar* _rhs, Index rhsStride,
    Scalar* res,        Index resStride,
    Scalar alpha)
  {
+    // strip zeros
+    Index diagSize  = (std::min)(_rows,_depth);
+    Index rows      = IsLower ? _rows : diagSize;
+    Index depth     = IsLower ? diagSize : _depth;
+    Index cols      = _cols;
+    
    const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
    const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);

-    typedef gebp_traits<Scalar,Scalar> Traits;
-    enum {
-      SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
-      IsLower = (Mode&Lower) == Lower,
-      SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
-    };
-
    Index kc = depth; // cache block size along the K direction
    Index mc = rows;  // cache block size along the M direction
    Index nc = cols;  // cache block size along the N direction
    computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
-
-    Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
    std::size_t sizeB = sizeW + kc*cols;
-    Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
+    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);    
    Scalar* blockB = allocatedBlockB + sizeW;

    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
@@ -140,7 +145,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
        IsLower ? k2>0 : k2<depth;
        IsLower ? k2-=kc : k2+=kc)
    {
-      Index actual_kc = std::min(IsLower ? k2 : depth-k2, kc);
+      Index actual_kc = (std::min)(IsLower ? k2 : depth-k2, kc);
      Index actual_k2 = IsLower ? k2-actual_kc : k2;

      // align blocks with the end of the triangular part for trapezoidal lhs
@@ -153,10 +158,11 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
      pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, actual_kc, cols);

      // the selected lhs's panel has to be split in three different parts:
-      //  1 - the part which is above the diagonal block => skip it
+      //  1 - the part which is zero => skip it
      //  2 - the diagonal block => special kernel
-      //  3 - the panel below the diagonal block => GEPP
-      // the block diagonal, if any
+      //  3 - the dense panel below (lower case) or above (upper case) the diagonal block => GEPP
+
+      // the block diagonal, if any:
      if(IsLower || actual_k2<rows)
      {
        // for each small vertical panels of lhs
@@ -194,13 +200,13 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
          }
        }
      }
-      // the part below the diagonal => GEPP
+      // the part below (lower case) or above (upper case) the diagonal => GEPP
      {
        Index start = IsLower ? k2 : 0;
-        Index end   = IsLower ? rows : std::min(actual_k2,rows);
+        Index end   = IsLower ? rows : (std::min)(actual_k2,rows);
        for(Index i2=start; i2<end; i2+=mc)
        {
-          const Index actual_mc = std::min(i2+mc,end)-i2;
+          const Index actual_mc = (std::min)(i2+mc,end)-i2;
          gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
            (blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);

@@ -208,10 +214,6 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
        }
      }
    }
-
-    ei_aligned_stack_delete(Scalar, blockA, kc*mc);
-    ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
-//     delete[] allocatedBlockB;
  }
 };

@@ -223,33 +225,38 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
                                           LhsStorageOrder,ConjugateLhs,
                                           RhsStorageOrder,ConjugateRhs,ColMajor>
 {
+  typedef gebp_traits<Scalar,Scalar> Traits;
+  enum {
+    SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+    IsLower = (Mode&Lower) == Lower,
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
+  };

  static EIGEN_DONT_INLINE void run(
-    Index rows, Index cols, Index depth,
+    Index _rows, Index _cols, Index _depth,
    const Scalar* _lhs, Index lhsStride,
    const Scalar* _rhs, Index rhsStride,
    Scalar* res,        Index resStride,
    Scalar alpha)
  {
+    // strip zeros
+    Index diagSize  = (std::min)(_cols,_depth);
+    Index rows      = _rows;
+    Index depth     = IsLower ? _depth : diagSize;
+    Index cols      = IsLower ? diagSize : _cols;
+    
    const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
    const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);

-    typedef gebp_traits<Scalar,Scalar> Traits;
-    enum {
-      SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
-      IsLower = (Mode&Lower) == Lower,
-      SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
-    };
-
    Index kc = depth; // cache block size along the K direction
    Index mc = rows;  // cache block size along the M direction
    Index nc = cols;  // cache block size along the N direction
    computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);

-    Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
    std::size_t sizeB = sizeW + kc*cols;
-    Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar,sizeB);
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
+    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
    Scalar* blockB = allocatedBlockB + sizeW;

    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
@@ -268,7 +275,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
        IsLower ? k2<depth  : k2>0;
        IsLower ? k2+=kc   : k2-=kc)
    {
-      Index actual_kc = std::min(IsLower ? depth-k2 : k2, kc);
+      Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc);
      Index actual_k2 = IsLower ? k2 : k2-actual_kc;

      // align blocks with the end of the triangular part for trapezoidal rhs
@@ -279,7 +286,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
      }

      // remaining size
-      Index rs = IsLower ? std::min(cols,actual_k2) : cols - k2;
+      Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2;
      // size of the triangular part
      Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;

@@ -320,7 +327,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,

      for (Index i2=0; i2<rows; i2+=mc)
      {
-        const Index actual_mc = std::min(mc,rows-i2);
+        const Index actual_mc = (std::min)(mc,rows-i2);
        pack_lhs(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);

        // triangular kernel
@@ -347,9 +354,6 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
                    -1, -1, 0, 0, allocatedBlockB);
      }
    }
-
-    ei_aligned_stack_delete(Scalar, blockA, kc*mc);
-    ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
  }
 };

--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -36,15 +36,16 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  enum {
    IsLower = ((Mode&Lower)==Lower),
-    HasUnitDiag = (Mode & UnitDiag)==UnitDiag
+    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
+    HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
  };
-  static EIGEN_DONT_INLINE  void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride,
+  static EIGEN_DONT_INLINE  void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
                                     const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
  {
-    EIGEN_UNUSED_VARIABLE(resIncr);
-    eigen_assert(resIncr==1);
-    
    static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
+    Index size = (std::min)(_rows,_cols);
+    Index rows = IsLower ? _rows : (std::min)(_rows,_cols);
+    Index cols = IsLower ? (std::min)(_rows,_cols) : _cols;

    typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
    const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
@@ -57,20 +58,20 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
    typedef Map<Matrix<ResScalar,Dynamic,1> > ResMap;
    ResMap res(_res,rows);

-    for (Index pi=0; pi<cols; pi+=PanelWidth)
+    for (Index pi=0; pi<size; pi+=PanelWidth)
    {
-      Index actualPanelWidth = std::min(PanelWidth, cols-pi);
+      Index actualPanelWidth = (std::min)(PanelWidth, size-pi);
      for (Index k=0; k<actualPanelWidth; ++k)
      {
        Index i = pi + k;
-        Index s = IsLower ? (HasUnitDiag ? i+1 : i ) : pi;
+        Index s = IsLower ? ((HasUnitDiag||HasZeroDiag) ? i+1 : i ) : pi;
        Index r = IsLower ? actualPanelWidth-k : k+1;
-        if ((!HasUnitDiag) || (--r)>0)
+        if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
          res.segment(s,r) += (alpha * cjRhs.coeff(i)) * cjLhs.col(i).segment(s,r);
        if (HasUnitDiag)
          res.coeffRef(i) += alpha * cjRhs.coeff(i);
      }
-      Index r = IsLower ? cols - pi - actualPanelWidth : pi;
+      Index r = IsLower ? rows - pi - actualPanelWidth : pi;
      if (r>0)
      {
        Index s = IsLower ? pi+actualPanelWidth : 0;
@@ -81,6 +82,14 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
            &res.coeffRef(s), resIncr, alpha);
      }
    }
+    if((!IsLower) && cols>size)
+    {
+      general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run(
+          rows, cols-size,
+          &lhs.coeffRef(0,size), lhsStride,
+          &rhs.coeffRef(size), rhsIncr,
+          _res, resIncr, alpha);
+    }
  }
 };

@@ -90,15 +99,16 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  enum {
    IsLower = ((Mode&Lower)==Lower),
-    HasUnitDiag = (Mode & UnitDiag)==UnitDiag
+    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
+    HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
  };
-  static void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride,
+  static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
                  const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
  {
-    eigen_assert(rhsIncr==1);
-    EIGEN_UNUSED_VARIABLE(rhsIncr);
-    
    static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
+    Index diagSize = (std::min)(_rows,_cols);
+    Index rows = IsLower ? _rows : diagSize;
+    Index cols = IsLower ? diagSize : _cols;

    typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
    const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
@@ -111,15 +121,15 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
    typedef Map<Matrix<ResScalar,Dynamic,1>, 0, InnerStride<> > ResMap;
    ResMap res(_res,rows,InnerStride<>(resIncr));
    
-    for (Index pi=0; pi<cols; pi+=PanelWidth)
+    for (Index pi=0; pi<diagSize; pi+=PanelWidth)
    {
-      Index actualPanelWidth = std::min(PanelWidth, cols-pi);
+      Index actualPanelWidth = (std::min)(PanelWidth, diagSize-pi);
      for (Index k=0; k<actualPanelWidth; ++k)
      {
        Index i = pi + k;
-        Index s = IsLower ? pi  : (HasUnitDiag ? i+1 : i);
+        Index s = IsLower ? pi  : ((HasUnitDiag||HasZeroDiag) ? i+1 : i);
        Index r = IsLower ? k+1 : actualPanelWidth-k;
-        if ((!HasUnitDiag) || (--r)>0)
+        if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
          res.coeffRef(i) += alpha * (cjLhs.row(i).segment(s,r).cwiseProduct(cjRhs.segment(s,r).transpose())).sum();
        if (HasUnitDiag)
          res.coeffRef(i) += alpha * cjRhs.coeff(i);
@@ -135,6 +145,14 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
            &res.coeffRef(pi), resIncr, alpha);
      }
    }
+    if(IsLower && rows>diagSize)
+    {
+      general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run(
+            rows-diagSize, cols,
+            &lhs.coeffRef(diagSize,0), lhsStride,
+            &rhs.coeffRef(0), rhsIncr,
+            &res.coeffRef(diagSize), resIncr, alpha);
+    }
  }
 };

@@ -185,8 +203,8 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
  {
    eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
-    
-    typedef TriangularProduct<(Mode & UnitDiag) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
+
+    typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
    Transpose<Dest> dstT(dst);
    internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run(
      TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha);
@@ -235,23 +253,15 @@ template<> struct trmv_selector<ColMajor>
    
    RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);

-    ResScalar* actualDestPtr;
-    bool freeDestPtr = false;
-    if (evalToDest)
-    {
-      actualDestPtr = dest.data();
-    }
-    else
+    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
+                                                  evalToDest ? dest.data() : static_dest.data());
+
+    if(!evalToDest)
    {
      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      int size = dest.size();
      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      #endif
-      if((actualDestPtr = static_dest.data())==0)
-      {
-        freeDestPtr = true;
-        actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
-      }
      if(!alphaIsCompatible)
      {
        MappedDest(actualDestPtr, dest.size()).setZero();
@@ -277,7 +287,6 @@ template<> struct trmv_selector<ColMajor>
        dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
      else
        dest = MappedDest(actualDestPtr, dest.size());
-      if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
    }
  }
 };
@@ -310,23 +319,15 @@ template<> struct trmv_selector<RowMajor>

    gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;

-    RhsScalar* actualRhsPtr;
-    bool freeRhsPtr = false;
-    if (DirectlyUseRhs)
-    {
-      actualRhsPtr = const_cast<RhsScalar*>(actualRhs.data());
-    }
-    else
+    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
+        DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
+
+    if(!DirectlyUseRhs)
    {
      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      int size = actualRhs.size();
      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      #endif
-      if((actualRhsPtr = static_rhs.data())==0)
-      {
-        freeRhsPtr = true;
-        actualRhsPtr = ei_aligned_stack_new(RhsScalar, actualRhs.size());
-      }
      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
    }
    
@@ -340,8 +341,6 @@ template<> struct trmv_selector<RowMajor>
            actualRhsPtr,1,
            dest.data(),dest.innerStride(),
            actualAlpha);
-
-    if((!DirectlyUseRhs) && freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, prod.rhs().size());
  }
 };

--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -70,38 +70,48 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
    Index nc = cols;  // cache block size along the N direction
    computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);

-    Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
    std::size_t sizeB = sizeW + kc*cols;
-    Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
+    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
    Scalar* blockB = allocatedBlockB + sizeW;
+    Scalar* blockW = allocatedBlockB;

    conj_if<Conjugate> conj;
    gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
    gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
    gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;

+    // the goal here is to subdivise the Rhs panels such that we keep some cache
+    // coherence when accessing the rhs elements
+    std::ptrdiff_t l1, l2;
+    manage_caching_sizes(GetAction, &l1, &l2);
+    Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0;
+    subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
+
    for(Index k2=IsLower ? 0 : size;
        IsLower ? k2<size : k2>0;
        IsLower ? k2+=kc : k2-=kc)
    {
-      const Index actual_kc = std::min(IsLower ? size-k2 : k2, kc);
+      const Index actual_kc = (std::min)(IsLower ? size-k2 : k2, kc);

      // We have selected and packed a big horizontal panel R1 of rhs. Let B be the packed copy of this panel,
      // and R2 the remaining part of rhs. The corresponding vertical panel of lhs is split into
      // A11 (the triangular part) and A21 the remaining rectangular part.
      // Then the high level algorithm is:
      //  - B = R1                    => general block copy (done during the next step)
-      //  - R1 = L1^-1 B              => tricky part
+      //  - R1 = A11^-1 B             => tricky part
      //  - update B from the new R1  => actually this has to be performed continuously during the above step
-      //  - R2 = L2 * B               => GEPP
+      //  - R2 -= A21 * B             => GEPP

-      // The tricky part: compute R1 = L1^-1 B while updating B from R1
-      // The idea is to split L1 into multiple small vertical panels.
-      // Each panel can be split into a small triangular part A1 which is processed without optimization,
-      // and the remaining small part A2 which is processed using gebp with appropriate block strides
+      // The tricky part: compute R1 = A11^-1 B while updating B from R1
+      // The idea is to split A11 into multiple small vertical panels.
+      // Each panel can be split into a small triangular part T1k which is processed without optimization,
+      // and the remaining small part T2k which is processed using gebp with appropriate block strides
+      for(Index j2=0; j2<cols; j2+=subcols)
      {
-        // for each small vertical panels of lhs
+        Index actual_cols = (std::min)(cols-j2,subcols);
+        // for each small vertical panels [T1k^T, T2k^T]^T of lhs
        for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
        {
          Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
@@ -114,7 +124,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
            Index rs = actualPanelWidth - k - 1; // remaining size

            Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
-            for (Index j=0; j<cols; ++j)
+            for (Index j=j2; j<j2+actual_cols; ++j)
            {
              if (TriStorageOrder==RowMajor)
              {
@@ -143,7 +153,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
          Index blockBOffset = IsLower ? k1 : lengthTarget;

          // update the respective rows of B from other
-          pack_rhs(blockB, _other+startBlock, otherStride, actualPanelWidth, cols, actual_kc, blockBOffset);
+          pack_rhs(blockB+actual_kc*j2, &other(startBlock,j2), otherStride, actualPanelWidth, actual_cols, actual_kc, blockBOffset);

          // GEBP
          if (lengthTarget>0)
@@ -152,19 +162,19 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO

            pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget);

-            gebp_kernel(_other+startTarget, otherStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, Scalar(-1),
-                        actualPanelWidth, actual_kc, 0, blockBOffset);
+            gebp_kernel(&other(startTarget,j2), otherStride, blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),
+                        actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
          }
        }
      }
-
-      // R2 = A2 * B => GEPP
+      
+      // R2 -= A21 * B => GEPP
      {
        Index start = IsLower ? k2+kc : 0;
        Index end   = IsLower ? size : k2-kc;
        for(Index i2=start; i2<end; i2+=mc)
        {
-          const Index actual_mc = std::min(mc,end-i2);
+          const Index actual_mc = (std::min)(mc,end-i2);
          if (actual_mc>0)
          {
            pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc);
@@ -174,9 +184,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
        }
      }
    }
-
-    ei_aligned_stack_delete(Scalar, blockA, kc*mc);
-    ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
  }
 };

@@ -209,10 +216,10 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
    Index nc = rows;  // cache block size along the N direction
    computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);

-    Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
    std::size_t sizeB = sizeW + kc*size;
-    Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
+    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
    Scalar* blockB = allocatedBlockB + sizeW;

    conj_if<Conjugate> conj;
@@ -225,7 +232,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
        IsLower ? k2>0 : k2<size;
        IsLower ? k2-=kc : k2+=kc)
    {
-      const Index actual_kc = std::min(IsLower ? k2 : size-k2, kc);
+      const Index actual_kc = (std::min)(IsLower ? k2 : size-k2, kc);
      Index actual_k2 = IsLower ? k2-actual_kc : k2 ;

      Index startPanel = IsLower ? 0 : k2+actual_kc;
@@ -254,7 +261,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage

      for(Index i2=0; i2<rows; i2+=mc)
      {
-        const Index actual_mc = std::min(mc,rows-i2);
+        const Index actual_mc = (std::min)(mc,rows-i2);

        // triangular solver kernel
        {
@@ -314,9 +321,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
                      -1, -1, 0, 0, allocatedBlockB);
      }
    }
-
-    ei_aligned_stack_delete(Scalar, blockA, kc*mc);
-    ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
  }
 };

--- a/Eigen/src/Core/products/TriangularSolverVector.h
+++ b/Eigen/src/Core/products/TriangularSolverVector.h
@@ -60,7 +60,7 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
        IsLower ? pi<size : pi>0;
        IsLower ? pi+=PanelWidth : pi-=PanelWidth)
    {
-      Index actualPanelWidth = std::min(IsLower ? size - pi : pi, PanelWidth);
+      Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);

      Index r = IsLower ? pi : size - pi; // remaining size
      if (r > 0)
@@ -114,7 +114,7 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
        IsLower ? pi<size : pi>0;
        IsLower ? pi+=PanelWidth : pi-=PanelWidth)
    {
-      Index actualPanelWidth = std::min(IsLower ? size - pi : pi, PanelWidth);
+      Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);
      Index startBlock = IsLower ? pi : pi-actualPanelWidth;
      Index endBlock = IsLower ? pi + actualPanelWidth : 0;

--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -161,23 +161,68 @@ const unsigned int HereditaryBits = RowMajorBit
                                  | EvalBeforeNestingBit
                                  | EvalBeforeAssigningBit;

-// Possible values for the Mode parameter of triangularView()
+/** \defgroup enums Enumerations
+  * \ingroup Core_Module
+  *
+  * Various enumerations used in %Eigen. Many of these are used as template parameters.
+  */
+
+/** \ingroup enums
+  * Enum containing possible values for the \p Mode parameter of 
+  * MatrixBase::selfadjointView() and MatrixBase::triangularView(). */
 enum {
-  Lower=0x1, Upper=0x2, UnitDiag=0x4, ZeroDiag=0x8,
-  UnitLower=UnitDiag|Lower, UnitUpper=UnitDiag|Upper,
-  StrictlyLower=ZeroDiag|Lower, StrictlyUpper=ZeroDiag|Upper,
-  SelfAdjoint=0x10};
+  /** View matrix as a lower triangular matrix. */
+  Lower=0x1,                      
+  /** View matrix as an upper triangular matrix. */
+  Upper=0x2,                      
+  /** %Matrix has ones on the diagonal; to be used in combination with #Lower or #Upper. */
+  UnitDiag=0x4, 
+  /** %Matrix has zeros on the diagonal; to be used in combination with #Lower or #Upper. */
+  ZeroDiag=0x8,
+  /** View matrix as a lower triangular matrix with ones on the diagonal. */
+  UnitLower=UnitDiag|Lower, 
+  /** View matrix as an upper triangular matrix with ones on the diagonal. */
+  UnitUpper=UnitDiag|Upper,
+  /** View matrix as a lower triangular matrix with zeros on the diagonal. */
+  StrictlyLower=ZeroDiag|Lower, 
+  /** View matrix as an upper triangular matrix with zeros on the diagonal. */
+  StrictlyUpper=ZeroDiag|Upper,
+  /** Used in BandMatrix and SelfAdjointView to indicate that the matrix is self-adjoint. */
+  SelfAdjoint=0x10
+};

-enum { Unaligned=0, Aligned=1 };
-enum { ConditionalJumpCost = 5 };
+/** \ingroup enums
+  * Enum for indicating whether an object is aligned or not. */
+enum { 
+  /** Object is not correctly aligned for vectorization. */
+  Unaligned=0, 
+  /** Object is aligned for vectorization. */
+  Aligned=1 
+};

+/** \ingroup enums
+ * Enum used by DenseBase::corner() in Eigen2 compatibility mode. */
 // FIXME after the corner() API change, this was not needed anymore, except by AlignedBox
 // TODO: find out what to do with that. Adapt the AlignedBox API ?
 enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };

-enum DirectionType { Vertical, Horizontal, BothDirections };
-enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct };
+/** \ingroup enums
+  * Enum containing possible values for the \p Direction parameter of
+  * Reverse, PartialReduxExpr and VectorwiseOp. */
+enum DirectionType { 
+  /** For Reverse, all columns are reversed; 
+    * for PartialReduxExpr and VectorwiseOp, act on columns. */
+  Vertical, 
+  /** For Reverse, all rows are reversed; 
+    * for PartialReduxExpr and VectorwiseOp, act on rows. */
+  Horizontal, 
+  /** For Reverse, both rows and columns are reversed; 
+    * not used for PartialReduxExpr and VectorwiseOp. */
+  BothDirections 
+};

+/** \internal \ingroup enums
+  * Enum to specify how to traverse the entries of a matrix. */
 enum {
  /** \internal Default traversal, no vectorization, no index-based access */
  DefaultTraversal,
@@ -196,14 +241,32 @@ enum {
  InvalidTraversal
 };

+/** \internal \ingroup enums
+  * Enum to specify whether to unroll loops when traversing over the entries of a matrix. */
 enum {
+  /** \internal Do not unroll loops. */
  NoUnrolling,
+  /** \internal Unroll only the inner loop, but not the outer loop. */
  InnerUnrolling,
+  /** \internal Unroll both the inner and the outer loop. If there is only one loop, 
+    * because linear traversal is used, then unroll that loop. */
  CompleteUnrolling
 };

+/** \internal \ingroup enums
+  * Enum to specify whether to use the default (built-in) implementation or the specialization. */
 enum {
+  Specialized,
+  BuiltIn
+};
+
+/** \ingroup enums
+  * Enum containing possible values for the \p _Options template parameter of
+  * Matrix, Array and BandMatrix. */
+enum {
+  /** Storage order is column major (see \ref TopicStorageOrders). */
  ColMajor = 0,
+  /** Storage order is row major (see \ref TopicStorageOrders). */
  RowMajor = 0x1,  // it is only a coincidence that this is equal to RowMajorBit -- don't rely on that
  /** \internal Align the matrix itself if it is vectorizable fixed-size */
  AutoAlign = 0,
@@ -211,11 +274,13 @@ enum {
  DontAlign = 0x2
 };

-/** \brief Enum for specifying whether to apply or solve on the left or right. 
-  */
+/** \ingroup enums
+  * Enum for specifying whether to apply or solve on the left or right. */
 enum {
-  OnTheLeft = 1,  /**< \brief Apply transformation on the left. */
-  OnTheRight = 2  /**< \brief Apply transformation on the right. */
+  /** Apply transformation on the left. */
+  OnTheLeft = 1,  
+  /** Apply transformation on the right. */
+  OnTheRight = 2  
 };

 /* the following could as well be written:
@@ -239,53 +304,111 @@ namespace {
  EIGEN_UNUSED Default_t Default;
 }

+/** \internal \ingroup enums
+  * Used in AmbiVector. */
 enum {
  IsDense         = 0,
  IsSparse
 };

+/** \ingroup enums
+  * Used as template parameter in DenseCoeffBase and MapBase to indicate 
+  * which accessors should be provided. */
 enum AccessorLevels {
-  ReadOnlyAccessors, WriteAccessors, DirectAccessors, DirectWriteAccessors
+  /** Read-only access via a member function. */
+  ReadOnlyAccessors, 
+  /** Read/write access via member functions. */
+  WriteAccessors, 
+  /** Direct read-only access to the coefficients. */
+  DirectAccessors, 
+  /** Direct read/write access to the coefficients. */
+  DirectWriteAccessors
 };

+/** \ingroup enums
+  * Enum with options to give to various decompositions. */
 enum DecompositionOptions {
-  Pivoting            = 0x01, // LDLT,
-  NoPivoting          = 0x02, // LDLT,
-  ComputeFullU        = 0x04, // SVD,
-  ComputeThinU        = 0x08, // SVD,
-  ComputeFullV        = 0x10, // SVD,
-  ComputeThinV        = 0x20, // SVD,
-  EigenvaluesOnly     = 0x40, // all eigen solvers
-  ComputeEigenvectors = 0x80, // all eigen solvers
+  /** \internal Not used (meant for LDLT?). */
+  Pivoting            = 0x01, 
+  /** \internal Not used (meant for LDLT?). */
+  NoPivoting          = 0x02, 
+  /** Used in JacobiSVD to indicate that the square matrix U is to be computed. */
+  ComputeFullU        = 0x04,
+  /** Used in JacobiSVD to indicate that the thin matrix U is to be computed. */
+  ComputeThinU        = 0x08,
+  /** Used in JacobiSVD to indicate that the square matrix V is to be computed. */
+  ComputeFullV        = 0x10,
+  /** Used in JacobiSVD to indicate that the thin matrix V is to be computed. */
+  ComputeThinV        = 0x20,
+  /** Used in SelfAdjointEigenSolver and GeneralizedSelfAdjointEigenSolver to specify
+    * that only the eigenvalues are to be computed and not the eigenvectors. */
+  EigenvaluesOnly     = 0x40,
+  /** Used in SelfAdjointEigenSolver and GeneralizedSelfAdjointEigenSolver to specify
+    * that both the eigenvalues and the eigenvectors are to be computed. */
+  ComputeEigenvectors = 0x80,
+  /** \internal */
  EigVecMask = EigenvaluesOnly | ComputeEigenvectors,
+  /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should
+    * solve the generalized eigenproblem \f$ Ax = \lambda B x \f$. */
  Ax_lBx              = 0x100,
+  /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should
+    * solve the generalized eigenproblem \f$ ABx = \lambda x \f$. */
  ABx_lx              = 0x200,
+  /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should
+    * solve the generalized eigenproblem \f$ BAx = \lambda x \f$. */
  BAx_lx              = 0x400,
+  /** \internal */
  GenEigMask = Ax_lBx | ABx_lx | BAx_lx
 };

+/** \ingroup enums
+  * Possible values for the \p QRPreconditioner template parameter of JacobiSVD. */
 enum QRPreconditioners {
+  /** Do not specify what is to be done if the SVD of a non-square matrix is asked for. */
  NoQRPreconditioner,
+  /** Use a QR decomposition without pivoting as the first step. */
  HouseholderQRPreconditioner,
+  /** Use a QR decomposition with column pivoting as the first step. */
  ColPivHouseholderQRPreconditioner,
+  /** Use a QR decomposition with full pivoting as the first step. */
  FullPivHouseholderQRPreconditioner
 };

-/** \brief Enum for reporting the status of a computation.
-  */
+#ifdef Success
+#error The preprocessor symbol 'Success' is defined, possibly by the X11 header file X.h
+#endif
+
+/** \ingroups enums
+  * Enum for reporting the status of a computation. */
 enum ComputationInfo {
-  Success = 0,        /**< \brief Computation was successful. */
-  NumericalIssue = 1, /**< \brief The provided data did not satisfy the prerequisites. */
-  NoConvergence = 2   /**< \brief Iterative procedure did not converge. */
+  /** Computation was successful. */
+  Success = 0,        
+  /** The provided data did not satisfy the prerequisites. */
+  NumericalIssue = 1, 
+  /** Iterative procedure did not converge. */
+  NoConvergence = 2,
+  /** The inputs are invalid, or the algorithm has been properly called.
+    * When assertions are enabled, such errors trigger an assert. */
+  InvalidInput = 3
 };

+/** \ingroup enums
+  * Enum used to specify how a particular transformation is stored in a matrix.
+  * \sa Transform, Hyperplane::transform(). */
 enum TransformTraits {
+  /** Transformation is an isometry. */
  Isometry      = 0x1,
+  /** Transformation is an affine transformation stored as a (Dim+1)^2 matrix whose last row is 
+    * assumed to be [0 ... 0 1]. */
  Affine        = 0x2,
+  /** Transformation is an affine transformation stored as a (Dim) x (Dim+1) matrix. */
  AffineCompact = 0x10 | Affine,
+  /** Transformation is a general projective transformation stored as a (Dim+1)^2 matrix. */
  Projective    = 0x20
 };

+/** \internal \ingroup enums
+  * Enum used to choose between implementation depending on the computer architecture. */
 namespace Architecture
 {
  enum Type {
@@ -302,8 +425,12 @@ namespace Architecture
  };
 }

+/** \internal \ingroup enums
+  * Enum used as template parameter in GeneralProduct. */
 enum { CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };

+/** \internal \ingroup enums
+  * Enum used in experimental parallel implementation. */
 enum Action {GetAction, SetAction};

 /** The type used to identify a dense storage. */
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -133,6 +133,7 @@ template<typename ExpressionType> class WithFormat;
 template<typename MatrixType> struct CommaInitializer;
 template<typename Derived> class ReturnByValue;
 template<typename ExpressionType> class ArrayWrapper;
+template<typename ExpressionType> class MatrixWrapper;

 namespace internal {
 template<typename DecompositionType, typename Rhs> struct solve_retval_base;
@@ -282,6 +283,8 @@ template<typename MatrixType,int Direction> class Homogeneous;
 // MatrixFunctions module
 template<typename Derived> struct MatrixExponentialReturnValue;
 template<typename Derived> class MatrixFunctionReturnValue;
+template<typename Derived> class MatrixSquareRootReturnValue;
+template<typename Derived> class MatrixLogarithmReturnValue;

 namespace internal {
 template <typename Scalar>
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -26,9 +26,9 @@
 #ifndef EIGEN_MACROS_H
 #define EIGEN_MACROS_H

-#define EIGEN_WORLD_VERSION 2
-#define EIGEN_MAJOR_VERSION 95
-#define EIGEN_MINOR_VERSION 0
+#define EIGEN_WORLD_VERSION 3
+#define EIGEN_MAJOR_VERSION 0
+#define EIGEN_MINOR_VERSION 91

 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                      (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -45,7 +45,7 @@
  #define EIGEN_GNUC_AT_MOST(x,y) 0
 #endif

-#if EIGEN_GNUC_AT_MOST(4,3)
+#if EIGEN_GNUC_AT_MOST(4,3) && !defined(__clang__)
  // see bug 89
  #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
 #else
@@ -130,31 +130,34 @@
 #define EIGEN_MAKESTRING2(a) #a
 #define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)

-// EIGEN_ALWAYS_INLINE_ATTRIB should be use in the declaration of function
-// which should be inlined even in debug mode.
-// FIXME with the always_inline attribute,
-// gcc 3.4.x reports the following compilation error:
-//   Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
-//    : function body not available
-#if EIGEN_GNUC_AT_LEAST(4,0)
-#define EIGEN_ALWAYS_INLINE_ATTRIB __attribute__((always_inline))
-#else
-#define EIGEN_ALWAYS_INLINE_ATTRIB
-#endif
-
 #if EIGEN_GNUC_AT_LEAST(4,1) && !defined(__clang__) && !defined(__INTEL_COMPILER)
 #define EIGEN_FLATTEN_ATTRIB __attribute__((flatten))
 #else
 #define EIGEN_FLATTEN_ATTRIB
 #endif

-// EIGEN_FORCE_INLINE means "inline as much as possible"
+// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
+// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
+// but GCC is still doing fine with just inline.
 #if (defined _MSC_VER) || (defined __INTEL_COMPILER)
 #define EIGEN_STRONG_INLINE __forceinline
 #else
 #define EIGEN_STRONG_INLINE inline
 #endif

+// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
+// attribute to maximize inlining. This should only be used when really necessary: in particular,
+// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
+// FIXME with the always_inline attribute,
+// gcc 3.4.x reports the following compilation error:
+//   Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
+//    : function body not available
+#if EIGEN_GNUC_AT_LEAST(4,0)
+#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
+#else
+#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
+#endif
+
 #if (defined __GNUC__)
 #define EIGEN_DONT_INLINE __attribute__((noinline))
 #elif (defined _MSC_VER)
@@ -231,12 +234,16 @@
 #define EIGEN_ONLY_USED_FOR_DEBUG(x)
 #endif

-#if (defined __GNUC__)
-#define EIGEN_DEPRECATED __attribute__((deprecated))
-#elif (defined _MSC_VER)
-#define EIGEN_DEPRECATED __declspec(deprecated)
+#ifndef EIGEN_NO_DEPRECATED_WARNING
+  #if (defined __GNUC__)
+    #define EIGEN_DEPRECATED __attribute__((deprecated))
+  #elif (defined _MSC_VER)
+    #define EIGEN_DEPRECATED __declspec(deprecated)
+  #else
+    #define EIGEN_DEPRECATED
+  #endif
 #else
-#define EIGEN_DEPRECATED
+  #define EIGEN_DEPRECATED
 #endif

 #if (defined __GNUC__)
@@ -249,7 +256,7 @@
 #define EIGEN_UNUSED_VARIABLE(var) (void)var;

 #if (defined __GNUC__)
-#define EIGEN_ASM_COMMENT(X)  asm("#"X)
+#define EIGEN_ASM_COMMENT(X)  asm("#" X)
 #else
 #define EIGEN_ASM_COMMENT(X)
 #endif
@@ -399,7 +406,7 @@
 #define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR) \
  template<typename OtherDerived> \
  EIGEN_STRONG_INLINE const CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived> \
-  METHOD(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
+  (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
  { \
    return CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived>(derived(), other.derived()); \
  }
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -82,6 +82,16 @@

 namespace internal {

+inline void throw_std_bad_alloc()
+{
+  #ifdef EIGEN_EXCEPTIONS
+    throw std::bad_alloc();
+  #else
+    std::size_t huge = -1;
+    new int[huge];
+  #endif
+}
+
 /*****************************************************************************
 *** Implementation of handmade aligned functions                           ***
 *****************************************************************************/
@@ -156,7 +166,7 @@ inline void* generic_aligned_realloc(void* ptr, size_t size, size_t old_size)

  if (ptr != 0)
  {
-    std::memcpy(newptr, ptr, std::min(size,old_size));
+    std::memcpy(newptr, ptr, (std::min)(size,old_size));
    aligned_free(ptr);
  }

@@ -192,7 +202,7 @@ inline void check_that_malloc_is_allowed()
 #endif

 /** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment.
-  * On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
+  * On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
  */
 inline void* aligned_malloc(size_t size)
 {
@@ -213,10 +223,9 @@ inline void* aligned_malloc(size_t size)
    result = handmade_aligned_malloc(size);
  #endif

-  #ifdef EIGEN_EXCEPTIONS
-    if(result == 0)
-      throw std::bad_alloc();
-  #endif
+  if(!result && size)
+    throw_std_bad_alloc();
+
  return result;
 }

@@ -241,7 +250,7 @@ inline void aligned_free(void *ptr)
 /**
 * \internal
 * \brief Reallocates an aligned block of memory.
-* \throws std::bad_alloc if EIGEN_EXCEPTIONS are defined.
+* \throws std::bad_alloc on allocation failure
 **/
 inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
 {
@@ -269,10 +278,9 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
  result = handmade_aligned_realloc(ptr,new_size,old_size);
 #endif

-#ifdef EIGEN_EXCEPTIONS
-  if (result==0 && new_size!=0)
-    throw std::bad_alloc();
-#endif
+  if (!result && new_size)
+    throw_std_bad_alloc();
+
  return result;
 }

@@ -281,7 +289,7 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
 *****************************************************************************/

 /** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
-  * On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
+  * On allocation error, the returned pointer is null, and a std::bad_alloc is thrown.
  */
 template<bool Align> inline void* conditional_aligned_malloc(size_t size)
 {
@@ -293,9 +301,8 @@ template<> inline void* conditional_aligned_malloc<false>(size_t size)
  check_that_malloc_is_allowed();

  void *result = std::malloc(size);
-  #ifdef EIGEN_EXCEPTIONS
-    if(!result) throw std::bad_alloc();
-  #endif
+  if(!result && size)
+    throw_std_bad_alloc();
  return result;
 }

@@ -347,18 +354,27 @@ template<typename T> inline void destruct_elements_of_array(T *ptr, size_t size)
 *** Implementation of aligned new/delete-like functions                    ***
 *****************************************************************************/

+template<typename T>
+EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
+{
+  if(size > size_t(-1) / sizeof(T))
+    throw_std_bad_alloc();
+}
+
 /** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
-  * On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown.
+  * On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown.
  * The default constructor of T is called.
  */
 template<typename T> inline T* aligned_new(size_t size)
 {
+  check_size_for_overflow<T>(size);
  T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
  return construct_elements_of_array(result, size);
 }

 template<typename T, bool Align> inline T* conditional_aligned_new(size_t size)
 {
+  check_size_for_overflow<T>(size);
  T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
  return construct_elements_of_array(result, size);
 }
@@ -383,6 +399,8 @@ template<typename T, bool Align> inline void conditional_aligned_delete(T *ptr,

 template<typename T, bool Align> inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size)
 {
+  check_size_for_overflow<T>(new_size);
+  check_size_for_overflow<T>(old_size);
  if(new_size < old_size)
    destruct_elements_of_array(pts+new_size, old_size-new_size);
  T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
@@ -394,6 +412,7 @@ template<typename T, bool Align> inline T* conditional_aligned_realloc_new(T* pt

 template<typename T, bool Align> inline T* conditional_aligned_new_auto(size_t size)
 {
+  check_size_for_overflow<T>(size);
  T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
  if(NumTraits<T>::RequireInitialization)
    construct_elements_of_array(result, size);
@@ -402,6 +421,8 @@ template<typename T, bool Align> inline T* conditional_aligned_new_auto(size_t s

 template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, size_t new_size, size_t old_size)
 {
+  check_size_for_overflow<T>(new_size);
+  check_size_for_overflow<T>(old_size);
  if(NumTraits<T>::RequireInitialization && (new_size < old_size))
    destruct_elements_of_array(pts+new_size, old_size-new_size);
  T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
@@ -462,42 +483,116 @@ inline static Index first_aligned(const Scalar* array, Index size)
  }
 }

+
+// std::copy is much slower than memcpy, so let's introduce a smart_copy which
+// use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
+template<typename T, bool UseMemcpy> struct smart_copy_helper;
+
+template<typename T> void smart_copy(const T* start, const T* end, T* target)
+{
+  smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
+}
+
+template<typename T> struct smart_copy_helper<T,true> {
+  inline static void run(const T* start, const T* end, T* target)
+  { memcpy(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
+};
+
+template<typename T> struct smart_copy_helper<T,false> {
+  inline static void run(const T* start, const T* end, T* target)
+  { std::copy(start, end, target); }
+};
+
+
 } // end namespace internal

 /*****************************************************************************
 *** Implementation of runtime stack allocation (falling back to malloc)    ***
 *****************************************************************************/

-/** \internal
-  * Allocates an aligned buffer of SIZE bytes on the stack if SIZE is smaller than
-  * EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
-  * (currently, this is Linux only). Otherwise the memory is allocated on the heap.
-  * Data allocated with ei_aligned_stack_alloc \b must be freed by calling
-  * ei_aligned_stack_free(PTR,SIZE).
-  * \code
-  * float * data = ei_aligned_stack_alloc(float,array.size());
-  * // ...
-  * ei_aligned_stack_free(data,float,array.size());
-  * \endcode
-  */
-#if (defined __linux__)
-  #define ei_aligned_stack_alloc(SIZE) (SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) \
-                                    ? alloca(SIZE) \
-                                    : Eigen::internal::aligned_malloc(SIZE)
-  #define ei_aligned_stack_free(PTR,SIZE) if(SIZE>EIGEN_STACK_ALLOCATION_LIMIT) Eigen::internal::aligned_free(PTR)
-#elif defined(_MSC_VER)
-  #define ei_aligned_stack_alloc(SIZE) (SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) \
-                                    ? _alloca(SIZE) \
-                                    : Eigen::internal::aligned_malloc(SIZE)
-  #define ei_aligned_stack_free(PTR,SIZE) if(SIZE>EIGEN_STACK_ALLOCATION_LIMIT) Eigen::internal::aligned_free(PTR)
-#else
-  #define ei_aligned_stack_alloc(SIZE) Eigen::internal::aligned_malloc(SIZE)
-  #define ei_aligned_stack_free(PTR,SIZE) Eigen::internal::aligned_free(PTR)
+// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
+// to the appropriate stack allocation function
+#ifndef EIGEN_ALLOCA
+  #if (defined __linux__)
+    #define EIGEN_ALLOCA alloca
+  #elif defined(_MSC_VER)
+    #define EIGEN_ALLOCA _alloca
+  #endif
 #endif

-#define ei_aligned_stack_new(TYPE,SIZE) Eigen::internal::construct_elements_of_array(reinterpret_cast<TYPE*>(ei_aligned_stack_alloc(sizeof(TYPE)*SIZE)), SIZE)
-#define ei_aligned_stack_delete(TYPE,PTR,SIZE) do {Eigen::internal::destruct_elements_of_array<TYPE>(PTR, SIZE); \
-                                                   ei_aligned_stack_free(PTR,sizeof(TYPE)*SIZE);} while(0)
+namespace internal {
+
+// This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
+// at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
+template<typename T> class aligned_stack_memory_handler
+{
+  public:
+    /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
+     * Note that \a ptr can be 0 regardless of the other parameters.
+     * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type T (see NumTraits<T>::RequireInitialization).
+     * In this case, the buffer elements will also be destructed when this handler will be destructed.
+     * Finally, if \a dealloc is true, then the pointer \a ptr is freed.
+     **/
+    aligned_stack_memory_handler(T* ptr, size_t size, bool dealloc)
+      : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
+    {
+      if(NumTraits<T>::RequireInitialization && m_ptr)
+        Eigen::internal::construct_elements_of_array(m_ptr, size);
+    }
+    ~aligned_stack_memory_handler()
+    {
+      if(NumTraits<T>::RequireInitialization && m_ptr)
+        Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
+      if(m_deallocate)
+        Eigen::internal::aligned_free(m_ptr);
+    }
+  protected:
+    T* m_ptr;
+    size_t m_size;
+    bool m_deallocate;
+};
+
+}
+
+/** \internal
+  * Declares, allocates and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
+  * if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
+  * (currently, this is Linux and Visual Studio only). Otherwise the memory is allocated on the heap.
+  * The allocated buffer is automatically deleted when exiting the scope of this declaration.
+  * If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.
+  * Here is an example:
+  * \code
+  * {
+  *   ei_declare_aligned_stack_constructed_variable(float,data,size,0);
+  *   // use data[0] to data[size-1]
+  * }
+  * \endcode
+  * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
+  */
+#ifdef EIGEN_ALLOCA
+
+  #ifdef __arm__
+    #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+16)) & ~(size_t(15))) + 16)
+  #else
+    #define EIGEN_ALIGNED_ALLOCA EIGEN_ALLOCA
+  #endif
+
+  #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
+    Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
+    TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
+               : reinterpret_cast<TYPE*>( \
+                      (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
+                    : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) );  \
+    Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
+
+#else
+
+  #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
+    Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
+    TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE));    \
+    Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
+    
+#endif


 /*****************************************************************************
@@ -612,12 +707,13 @@ public:

    size_type max_size() const throw()
    {
-        return std::numeric_limits<size_type>::max();
+        return (std::numeric_limits<size_type>::max)();
    }

-    pointer allocate( size_type num, const_pointer* hint = 0 )
+    pointer allocate( size_type num, const void* hint = 0 )
    {
-        static_cast<void>( hint ); // suppress unused variable warning
+        EIGEN_UNUSED_VARIABLE(hint);
+        internal::check_size_for_overflow<T>(num);
        return static_cast<pointer>( internal::aligned_malloc( num * sizeof(T) ) );
    }

@@ -852,7 +948,7 @@ inline int queryTopLevelCacheSize()
 {
  int l1, l2(-1), l3(-1);
  queryCacheSizes(l1,l2,l3);
-  return std::max(l2,l3);
+  return (std::max)(l2,l3);
 }

 } // end namespace internal
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -70,6 +70,7 @@
        YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR,
        UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC,
        THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES,
+        FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED,
        NUMERIC_TYPE_MUST_BE_REAL,
        COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED,
        WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED,
@@ -95,7 +96,12 @@
        YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION,
        THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY,
        YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT,
-        THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS
+        THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS,
+        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL,
+        THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES,
+        YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED,
+        YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
+        THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE
      };
    };

@@ -195,4 +201,15 @@
      EIGEN_STATIC_ASSERT(internal::is_lvalue<Derived>::value, \
                          THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY)

+#define EIGEN_STATIC_ASSERT_ARRAYXPR(Derived) \
+      EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Derived>::XprKind, ArrayXpr>::value), \
+                          THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES)
+
+#define EIGEN_STATIC_ASSERT_SAME_XPR_KIND(Derived1, Derived2) \
+      EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Derived1>::XprKind, \
+                                             typename internal::traits<Derived2>::XprKind \
+                                            >::value), \
+                          YOU_CANNOT_MIX_ARRAYS_AND_MATRICES)
+
+
 #endif // EIGEN_STATIC_ASSERT_H
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -125,10 +125,9 @@ class compute_matrix_flags
      aligned_bit =
      (
            ((Options&DontAlign)==0)
-        &&  packet_traits<Scalar>::Vectorizable
        && (
 #if EIGEN_ALIGN_STATICALLY
-             ((!is_dynamic_size_storage) && (((MaxCols*MaxRows) % packet_traits<Scalar>::size) == 0))
+             ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*sizeof(Scalar)) % 16) == 0))
 #else
             0
 #endif
--- a/Eigen/src/Eigen2Support/Cwise.h
+++ b/Eigen/src/Eigen2Support/Cwise.h
@@ -82,13 +82,17 @@ template<typename ExpressionType> class Cwise
    const EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_quotient_op)
    operator/(const MatrixBase<OtherDerived> &other) const;

+    /** \deprecated ArrayBase::min() */
    template<typename OtherDerived>
    const EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_min_op)
-    min(const MatrixBase<OtherDerived> &other) const;
+    (min)(const MatrixBase<OtherDerived> &other) const
+    { return EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_min_op)(_expression(), other.derived()); }

+    /** \deprecated ArrayBase::max() */
    template<typename OtherDerived>
    const EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_max_op)
-    max(const MatrixBase<OtherDerived> &other) const;
+    (max)(const MatrixBase<OtherDerived> &other) const
+    { return EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_max_op)(_expression(), other.derived()); }

    const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_abs_op)      abs() const;
    const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_abs2_op)     abs2() const;
--- a/Eigen/src/Eigen2Support/CwiseOperators.h
+++ b/Eigen/src/Eigen2Support/CwiseOperators.h
@@ -96,24 +96,6 @@ inline ExpressionType& Cwise<ExpressionType>::operator/=(const MatrixBase<OtherD
  return m_matrix.const_cast_derived() = *this / other;
 }

-/** \deprecated ArrayBase::min() */
-template<typename ExpressionType>
-template<typename OtherDerived>
-EIGEN_STRONG_INLINE const EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_min_op)
-Cwise<ExpressionType>::min(const MatrixBase<OtherDerived> &other) const
-{
-  return EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_min_op)(_expression(), other.derived());
-}
-
-/** \deprecated ArrayBase::max() */
-template<typename ExpressionType>
-template<typename OtherDerived>
-EIGEN_STRONG_INLINE const EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_max_op)
-Cwise<ExpressionType>::max(const MatrixBase<OtherDerived> &other) const
-{
-  return EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_max_op)(_expression(), other.derived());
-}
-
 /***************************************************************************
 * The following functions were defined in Array
 ***************************************************************************/
--- a/Eigen/src/Eigen2Support/Geometry/AlignedBox.h
+++ b/Eigen/src/Eigen2Support/Geometry/AlignedBox.h
@@ -71,18 +71,18 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==
  /** Makes \c *this a null/empty box. */
  inline void setNull()
  {
-    m_min.setConstant( std::numeric_limits<Scalar>::max());
-    m_max.setConstant(-std::numeric_limits<Scalar>::max());
+    m_min.setConstant( (std::numeric_limits<Scalar>::max)());
+    m_max.setConstant(-(std::numeric_limits<Scalar>::max)());
  }

  /** \returns the minimal corner */
-  inline const VectorType& min() const { return m_min; }
+  inline const VectorType& (min)() const { return m_min; }
  /** \returns a non const reference to the minimal corner */
-  inline VectorType& min() { return m_min; }
+  inline VectorType& (min)() { return m_min; }
  /** \returns the maximal corner */
-  inline const VectorType& max() const { return m_max; }
+  inline const VectorType& (max)() const { return m_max; }
  /** \returns a non const reference to the maximal corner */
-  inline VectorType& max() { return m_max; }
+  inline VectorType& (max)() { return m_max; }

  /** \returns true if the point \a p is inside the box \c *this. */
  inline bool contains(const VectorType& p) const
@@ -90,19 +90,19 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==

  /** \returns true if the box \a b is entirely inside the box \c *this. */
  inline bool contains(const AlignedBox& b) const
-  { return (m_min.cwise()<=b.min()).all() && (b.max().cwise()<=m_max).all(); }
+  { return (m_min.cwise()<=(b.min)()).all() && ((b.max)().cwise()<=m_max).all(); }

  /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this. */
  inline AlignedBox& extend(const VectorType& p)
-  { m_min = m_min.cwise().min(p); m_max = m_max.cwise().max(p); return *this; }
+  { m_min = (m_min.cwise().min)(p); m_max = (m_max.cwise().max)(p); return *this; }

  /** Extends \c *this such that it contains the box \a b and returns a reference to \c *this. */
  inline AlignedBox& extend(const AlignedBox& b)
-  { m_min = m_min.cwise().min(b.m_min); m_max = m_max.cwise().max(b.m_max); return *this; }
+  { m_min = (m_min.cwise().min)(b.m_min); m_max = (m_max.cwise().max)(b.m_max); return *this; }

  /** Clamps \c *this by the box \a b and returns a reference to \c *this. */
  inline AlignedBox& clamp(const AlignedBox& b)
-  { m_min = m_min.cwise().max(b.m_min); m_max = m_max.cwise().min(b.m_max); return *this; }
+  { m_min = (m_min.cwise().max)(b.m_min); m_max = (m_max.cwise().min)(b.m_max); return *this; }

  /** Translate \c *this by the vector \a t and returns a reference to \c *this. */
  inline AlignedBox& translate(const VectorType& t)
@@ -138,8 +138,8 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==
  template<typename OtherScalarType>
  inline explicit AlignedBox(const AlignedBox<OtherScalarType,AmbientDimAtCompileTime>& other)
  {
-    m_min = other.min().template cast<Scalar>();
-    m_max = other.max().template cast<Scalar>();
+    m_min = (other.min)().template cast<Scalar>();
+    m_max = (other.max)().template cast<Scalar>();
  }

  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
--- a/Eigen/src/Eigen2Support/Geometry/All.h
+++ b/Eigen/src/Eigen2Support/Geometry/All.h
@@ -112,4 +112,4 @@
 #undef Hyperplane
 #undef ParametrizedLine

-#endif // EIGEN2_GEOMETRY_MODULE_H
+#endif // EIGEN2_GEOMETRY_MODULE_H
--- a/Eigen/src/Eigen2Support/SVD.h
+++ b/Eigen/src/Eigen2Support/SVD.h
@@ -64,9 +64,9 @@ template<typename MatrixType> class SVD
    SVD() {} // a user who relied on compiler-generated default compiler reported problems with MSVC in 2.0.7
    
    SVD(const MatrixType& matrix)
-      : m_matU(matrix.rows(), std::min(matrix.rows(), matrix.cols())),
+      : m_matU(matrix.rows(), (std::min)(matrix.rows(), matrix.cols())),
        m_matV(matrix.cols(),matrix.cols()),
-        m_sigma(std::min(matrix.rows(),matrix.cols()))
+        m_sigma((std::min)(matrix.rows(),matrix.cols()))
    {
      compute(matrix);
    }
@@ -108,13 +108,13 @@ void SVD<MatrixType>::compute(const MatrixType& matrix)
 {
  const int m = matrix.rows();
  const int n = matrix.cols();
-  const int nu = std::min(m,n);
+  const int nu = (std::min)(m,n);
  ei_assert(m>=n && "In Eigen 2.0, SVD only works for MxN matrices with M>=N. Sorry!");
  ei_assert(m>1 && "In Eigen 2.0, SVD doesn't work on 1x1 matrices");

  m_matU.resize(m, nu);
  m_matU.setZero();
-  m_sigma.resize(std::min(m,n));
+  m_sigma.resize((std::min)(m,n));
  m_matV.resize(n,n);

  RowVector e(n);
@@ -126,9 +126,9 @@ void SVD<MatrixType>::compute(const MatrixType& matrix)

  // Reduce A to bidiagonal form, storing the diagonal elements
  // in s and the super-diagonal elements in e.
-  int nct = std::min(m-1,n);
-  int nrt = std::max(0,std::min(n-2,m));
-  for (k = 0; k < std::max(nct,nrt); ++k)
+  int nct = (std::min)(m-1,n);
+  int nrt = (std::max)(0,(std::min)(n-2,m));
+  for (k = 0; k < (std::max)(nct,nrt); ++k)
  {
    if (k < nct)
    {
@@ -193,7 +193,7 @@ void SVD<MatrixType>::compute(const MatrixType& matrix)


  // Set up the final bidiagonal matrix or order p.
-  int p = std::min(n,m+1);
+  int p = (std::min)(n,m+1);
  if (nct < n)
    m_sigma[nct] = matA(nct,nct);
  if (m < p)
@@ -380,7 +380,7 @@ void SVD<MatrixType>::compute(const MatrixType& matrix)
      case 3:
      {
        // Calculate the shift.
-        Scalar scale = std::max(std::max(std::max(std::max(
+        Scalar scale = (std::max)((std::max)((std::max)((std::max)(
                        ei_abs(m_sigma[p-1]),ei_abs(m_sigma[p-2])),ei_abs(e[p-2])),
                        ei_abs(m_sigma[k])),ei_abs(e[k]));
        Scalar sp = m_sigma[p-1]/scale;
--- a/Eigen/src/Eigenvalues/ComplexSchur.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur.h
@@ -227,46 +227,6 @@ template<typename _MatrixType> class ComplexSchur
    friend struct internal::complex_schur_reduce_to_hessenberg<MatrixType, NumTraits<Scalar>::IsComplex>;
 };

-namespace internal {
-
-/** Computes the principal value of the square root of the complex \a z. */
-template<typename RealScalar>
-std::complex<RealScalar> sqrt(const std::complex<RealScalar> &z)
-{
-  RealScalar t, tre, tim;
-
-  t = abs(z);
-
-  if (abs(real(z)) <= abs(imag(z)))
-  {
-    // No cancellation in these formulas
-    tre = sqrt(RealScalar(0.5)*(t + real(z)));
-    tim = sqrt(RealScalar(0.5)*(t - real(z)));
-  }
-  else
-  {
-    // Stable computation of the above formulas
-    if (z.real() > RealScalar(0))
-    {
-      tre = t + z.real();
-      tim = abs(imag(z))*sqrt(RealScalar(0.5)/tre);
-      tre = sqrt(RealScalar(0.5)*tre);
-    }
-    else
-    {
-      tim = t - z.real();
-      tre = abs(imag(z))*sqrt(RealScalar(0.5)/tim);
-      tim = sqrt(RealScalar(0.5)*tim);
-    }
-  }
-  if(z.imag() < RealScalar(0))
-    tim = -tim;
-
-  return (std::complex<RealScalar>(tre,tim));
-}
-} // end namespace internal
-
-
 /** If m_matT(i+1,i) is neglegible in floating point arithmetic
  * compared to m_matT(i,i) and m_matT(j,j), then set it to zero and
  * return true, else return false. */
@@ -302,7 +262,7 @@ typename ComplexSchur<MatrixType>::ComplexScalar ComplexSchur<MatrixType>::compu

  ComplexScalar b = t.coeff(0,1) * t.coeff(1,0);
  ComplexScalar c = t.coeff(0,0) - t.coeff(1,1);
-  ComplexScalar disc = internal::sqrt(c*c + RealScalar(4)*b);
+  ComplexScalar disc = sqrt(c*c + RealScalar(4)*b);
  ComplexScalar det = t.coeff(0,0) * t.coeff(1,1) - b;
  ComplexScalar trace = t.coeff(0,0) + t.coeff(1,1);
  ComplexScalar eival1 = (trace + disc) / RealScalar(2);
@@ -423,7 +383,7 @@ void ComplexSchur<MatrixType>::reduceToTriangularForm(bool computeU)
    JacobiRotation<ComplexScalar> rot;
    rot.makeGivens(m_matT.coeff(il,il) - shift, m_matT.coeff(il+1,il));
    m_matT.rightCols(m_matT.cols()-il).applyOnTheLeft(il, il+1, rot.adjoint());
-    m_matT.topRows(std::min(il+2,iu)+1).applyOnTheRight(il, il+1, rot);
+    m_matT.topRows((std::min)(il+2,iu)+1).applyOnTheRight(il, il+1, rot);
    if(computeU) m_matU.applyOnTheRight(il, il+1, rot);

    for(Index i=il+1 ; i<iu ; i++)
@@ -431,7 +391,7 @@ void ComplexSchur<MatrixType>::reduceToTriangularForm(bool computeU)
      rot.makeGivens(m_matT.coeffRef(i,i-1), m_matT.coeffRef(i+1,i-1), &m_matT.coeffRef(i,i-1));
      m_matT.coeffRef(i+1,i-1) = ComplexScalar(0);
      m_matT.rightCols(m_matT.cols()-i).applyOnTheLeft(i, i+1, rot.adjoint());
-      m_matT.topRows(std::min(i+2,iu)+1).applyOnTheRight(i, i+1, rot);
+      m_matT.topRows((std::min)(i+2,iu)+1).applyOnTheRight(i, i+1, rot);
      if(computeU) m_matU.applyOnTheRight(i, i+1, rot);
    }
  }
--- a/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/Eigen/src/Eigenvalues/EigenSolver.h
@@ -343,6 +343,7 @@ typename EigenSolver<MatrixType>::EigenvectorsType EigenSolver<MatrixType>::eige
    {
      // we have a real eigen value
      matV.col(j) = m_eivec.col(j).template cast<ComplexScalar>();
+      matV.col(j).normalize();
    }
    else
    {
@@ -434,7 +435,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
  Scalar norm = 0.0;
  for (Index j = 0; j < size; ++j)
  {
-    norm += m_matT.row(j).segment(std::max(j-1,Index(0)), size-std::max(j-1,Index(0))).cwiseAbs().sum();
+    norm += m_matT.row(j).segment((std::max)(j-1,Index(0)), size-(std::max)(j-1,Index(0))).cwiseAbs().sum();
  }
  
  // Backsubstitute to find vectors of upper triangular form
@@ -449,7 +450,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
    Scalar q = m_eivalues.coeff(n).imag();

    // Scalar vector
-    if (q == 0)
+    if (q == Scalar(0))
    {
      Scalar lastr=0, lastw=0;
      Index l = n;
@@ -490,12 +491,12 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()

          // Overflow control
          Scalar t = internal::abs(m_matT.coeff(i,n));
-          if ((eps * t) * t > 1)
+          if ((eps * t) * t > Scalar(1))
            m_matT.col(n).tail(size-i) /= t;
        }
      }
    }
-    else if (q < 0) // Complex vector
+    else if (q < Scalar(0) && n > 0) // Complex vector
    {
      Scalar lastra=0, lastsa=0, lastw=0;
      Index l = n-1;
@@ -529,7 +530,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
        else
        {
          l = i;
-          if (m_eivalues.coeff(i).imag() == 0)
+          if (m_eivalues.coeff(i).imag() == RealScalar(0))
          {
            std::complex<Scalar> cc = cdiv(-ra,-sa,w,q);
            m_matT.coeffRef(i,n-1) = internal::real(cc);
@@ -562,13 +563,18 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
          }

          // Overflow control
-          Scalar t = std::max(internal::abs(m_matT.coeff(i,n-1)),internal::abs(m_matT.coeff(i,n)));
-          if ((eps * t) * t > 1)
+          using std::max;
+          Scalar t = (max)(internal::abs(m_matT.coeff(i,n-1)),internal::abs(m_matT.coeff(i,n)));
+          if ((eps * t) * t > Scalar(1))
            m_matT.block(i, n-1, size-i, 2) /= t;

        }
      }
    }
+    else
+    {
+      eigen_assert("Internal bug in EigenSolver"); // this should not happen
+    }
  }

  // Back transformation to get eigenvectors of original matrix
--- a/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h
@@ -98,8 +98,8 @@ class GeneralizedSelfAdjointEigenSolver : public SelfAdjointEigenSolver<_MatrixT
      *                   Only the lower triangular part of the matrix is referenced.
      * \param[in]  matB  Positive-definite matrix in matrix pencil.
      *                   Only the lower triangular part of the matrix is referenced.
-      * \param[in]  options A or-ed set of flags {ComputeEigenvectors,EigenvaluesOnly} | {Ax_lBx,ABx_lx,BAx_lx}.
-      *                     Default is ComputeEigenvectors|Ax_lBx.
+      * \param[in]  options A or-ed set of flags {#ComputeEigenvectors,#EigenvaluesOnly} | {#Ax_lBx,#ABx_lx,#BAx_lx}.
+      *                     Default is #ComputeEigenvectors|#Ax_lBx.
      *
      * This constructor calls compute(const MatrixType&, const MatrixType&, int)
      * to compute the eigenvalues and (if requested) the eigenvectors of the
@@ -131,8 +131,8 @@ class GeneralizedSelfAdjointEigenSolver : public SelfAdjointEigenSolver<_MatrixT
      *                   Only the lower triangular part of the matrix is referenced.
      * \param[in]  matB  Positive-definite matrix in matrix pencil.
      *                   Only the lower triangular part of the matrix is referenced.
-      * \param[in]  options A or-ed set of flags {ComputeEigenvectors,EigenvaluesOnly} | {Ax_lBx,ABx_lx,BAx_lx}.
-      *                     Default is ComputeEigenvectors|Ax_lBx.
+      * \param[in]  options A or-ed set of flags {#ComputeEigenvectors,#EigenvaluesOnly} | {#Ax_lBx,#ABx_lx,#BAx_lx}.
+      *                     Default is #ComputeEigenvectors|#Ax_lBx.
      *
      * \returns    Reference to \c *this
      *
--- a/Eigen/src/Eigenvalues/RealSchur.h
+++ b/Eigen/src/Eigenvalues/RealSchur.h
@@ -290,7 +290,7 @@ inline typename MatrixType::Scalar RealSchur<MatrixType>::computeNormOfT()
  //               + m_matT.bottomLeftCorner(size-1,size-1).diagonal().cwiseAbs().sum();
  Scalar norm = 0.0;
  for (Index j = 0; j < size; ++j)
-    norm += m_matT.row(j).segment(std::max(j-1,Index(0)), size-std::max(j-1,Index(0))).cwiseAbs().sum();
+    norm += m_matT.row(j).segment((std::max)(j-1,Index(0)), size-(std::max)(j-1,Index(0))).cwiseAbs().sum();
  return norm;
 }

@@ -324,11 +324,11 @@ inline void RealSchur<MatrixType>::splitOffTwoRows(Index iu, bool computeU, Scal
  m_matT.coeffRef(iu,iu) += exshift;
  m_matT.coeffRef(iu-1,iu-1) += exshift;

-  if (q >= 0) // Two real eigenvalues
+  if (q >= Scalar(0)) // Two real eigenvalues
  {
    Scalar z = internal::sqrt(internal::abs(q));
    JacobiRotation<Scalar> rot;
-    if (p >= 0)
+    if (p >= Scalar(0))
      rot.makeGivens(p + z, m_matT.coeff(iu, iu-1));
    else
      rot.makeGivens(p - z, m_matT.coeff(iu, iu-1));
@@ -369,7 +369,7 @@ inline void RealSchur<MatrixType>::computeShift(Index iu, Index iter, Scalar& ex
  {
    Scalar s = (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);
    s = s * s + shiftInfo.coeff(2);
-    if (s > 0)
+    if (s > Scalar(0))
    {
      s = internal::sqrt(s);
      if (shiftInfo.coeff(1) < shiftInfo.coeff(0))
@@ -442,7 +442,7 @@ inline void RealSchur<MatrixType>::performFrancisQRStep(Index il, Index im, Inde

      // These Householder transformations form the O(n^3) part of the algorithm
      m_matT.block(k, k, 3, size-k).applyHouseholderOnTheLeft(ess, tau, workspace);
-      m_matT.block(0, k, std::min(iu,k+3) + 1, 3).applyHouseholderOnTheRight(ess, tau, workspace);
+      m_matT.block(0, k, (std::min)(iu,k+3) + 1, 3).applyHouseholderOnTheRight(ess, tau, workspace);
      if (computeU)
        m_matU.block(0, k, size, 3).applyHouseholderOnTheRight(ess, tau, workspace);
    }
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@@ -32,6 +32,10 @@
 template<typename _MatrixType>
 class GeneralizedSelfAdjointEigenSolver;

+namespace internal {
+template<typename SolverType,int Size,bool IsComplex> struct direct_selfadjoint_eigenvalues;
+}
+
 /** \eigenvalues_module \ingroup Eigenvalues_Module
  *
  *
@@ -86,7 +90,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
      Options = MatrixType::Options,
      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
    };
-
+    
    /** \brief Scalar type for matrices of type \p _MatrixType. */
    typedef typename MatrixType::Scalar Scalar;
    typedef typename MatrixType::Index Index;
@@ -98,6 +102,8 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
      * complex.
      */
    typedef typename NumTraits<Scalar>::Real RealScalar;
+    
+    friend struct internal::direct_selfadjoint_eigenvalues<SelfAdjointEigenSolver,Size,NumTraits<Scalar>::IsComplex>;

    /** \brief Type for vector of eigenvalues as returned by eigenvalues().
      *
@@ -147,11 +153,11 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
      *
      * \param[in]  matrix  Selfadjoint matrix whose eigendecomposition is to
      *    be computed. Only the lower triangular part of the matrix is referenced.
-      * \param[in]  options Can be ComputeEigenvectors (default) or EigenvaluesOnly.
+      * \param[in]  options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly.
      *
      * This constructor calls compute(const MatrixType&, int) to compute the
      * eigenvalues of the matrix \p matrix. The eigenvectors are computed if
-      * \p options equals ComputeEigenvectors.
+      * \p options equals #ComputeEigenvectors.
      *
      * Example: \include SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType.cpp
      * Output: \verbinclude SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType.out
@@ -171,11 +177,11 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
      *
      * \param[in]  matrix  Selfadjoint matrix whose eigendecomposition is to
      *    be computed. Only the lower triangular part of the matrix is referenced.
-      * \param[in]  options Can be ComputeEigenvectors (default) or EigenvaluesOnly.
+      * \param[in]  options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly.
      * \returns    Reference to \c *this
      *
      * This function computes the eigenvalues of \p matrix.  The eigenvalues()
-      * function can be used to retrieve them.  If \p options equals ComputeEigenvectors,
+      * function can be used to retrieve them.  If \p options equals #ComputeEigenvectors,
      * then the eigenvectors are also computed and can be retrieved by
      * calling eigenvectors().
      *
@@ -198,6 +204,22 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
      * \sa SelfAdjointEigenSolver(const MatrixType&, int)
      */
    SelfAdjointEigenSolver& compute(const MatrixType& matrix, int options = ComputeEigenvectors);
+    
+    /** \brief Computes eigendecomposition of given matrix using a direct algorithm
+      *
+      * This is a variant of compute(const MatrixType&, int options) which
+      * directly solves the underlying polynomial equation.
+      * 
+      * Currently only 3x3 matrices for which the sizes are known at compile time are supported (e.g., Matrix3d).
+      * 
+      * This method is usually significantly faster than the QR algorithm
+      * but it might also be less accurate. It is also worth noting that
+      * for 3x3 matrices it involves trigonometric operations which are
+      * not necessarily available for all scalar types.
+      *
+      * \sa compute(const MatrixType&, int options)
+      */
+    SelfAdjointEigenSolver& computeDirect(const MatrixType& matrix, int options = ComputeEigenvectors);

    /** \brief Returns the eigenvectors of given matrix.
      *
@@ -220,6 +242,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
    const MatrixType& eigenvectors() const
    {
      eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
+      eigen_assert(info() == Success && "Eigenvalue computation did not converge.");
      eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues.");
      return m_eivec;
    }
@@ -242,6 +265,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
    const RealVectorType& eigenvalues() const
    {
      eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
+      eigen_assert(info() == Success && "Eigenvalue computation did not converge.");
      return m_eivalues;
    }

@@ -266,6 +290,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
    MatrixType operatorSqrt() const
    {
      eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
+      eigen_assert(info() == Success && "Eigenvalue computation did not converge.");
      eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues.");
      return m_eivec * m_eivalues.cwiseSqrt().asDiagonal() * m_eivec.adjoint();
    }
@@ -291,6 +316,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
    MatrixType operatorInverseSqrt() const
    {
      eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
+      eigen_assert(info() == Success && "Eigenvalue computation did not converge.");
      eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues.");
      return m_eivec * m_eivalues.cwiseInverse().cwiseSqrt().asDiagonal() * m_eivec.adjoint();
    }
@@ -307,7 +333,8 @@ template<typename _MatrixType> class SelfAdjointEigenSolver

    /** \brief Maximum number of iterations.
      *
-      * Maximum number of iterations allowed for an eigenvalue to converge.
+      * The algorithm terminates if it does not converge within m_maxIterations * n iterations, where n
+      * denotes the size of the matrix. This value is currently set to 30 (copied from LAPACK).
      */
    static const int m_maxIterations = 30;

@@ -387,7 +414,7 @@ SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
  {
    m_eivalues.coeffRef(0,0) = internal::real(matrix.coeff(0,0));
    if(computeEigenvectors)
-      m_eivec.setOnes();
+      m_eivec.setOnes(n,n);
    m_info = Success;
    m_isInitialized = true;
    m_eigenvectorsOk = computeEigenvectors;
@@ -407,7 +434,7 @@ SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
  
  Index end = n-1;
  Index start = 0;
-  Index iter = 0; // number of iterations we are working on one element
+  Index iter = 0; // total number of iterations

  while (end>0)
  {
@@ -418,15 +445,14 @@ SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
    // find the largest unreduced block
    while (end>0 && m_subdiag[end-1]==0)
    {
-      iter = 0;
      end--;
    }
    if (end<=0)
      break;

-    // if we spent too many iterations on the current element, we give up
+    // if we spent too many iterations, we give up
    iter++;
-    if(iter > m_maxIterations) break;
+    if(iter > m_maxIterations * n) break;

    start = end - 1;
    while (start>0 && m_subdiag[start-1]!=0)
@@ -435,7 +461,7 @@ SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
    internal::tridiagonal_qr_step<MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor>(diag.data(), m_subdiag.data(), start, end, computeEigenvectors ? m_eivec.data() : (Scalar*)0, n);
  }

-  if (iter <= m_maxIterations)
+  if (iter <= m_maxIterations * n)
    m_info = Success;
  else
    m_info = NoConvergence;
@@ -466,6 +492,264 @@ SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
  return *this;
 }

+
+namespace internal {
+  
+template<typename SolverType,int Size,bool IsComplex> struct direct_selfadjoint_eigenvalues
+{
+  inline static void run(SolverType& eig, const typename SolverType::MatrixType& A, int options)
+  { eig.compute(A,options); }
+};
+
+template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3,false>
+{
+  typedef typename SolverType::MatrixType MatrixType;
+  typedef typename SolverType::RealVectorType VectorType;
+  typedef typename SolverType::Scalar Scalar;
+  
+  inline static void computeRoots(const MatrixType& m, VectorType& roots)
+  {
+    using std::sqrt;
+    using std::atan2;
+    using std::cos;
+    using std::sin;
+    const Scalar s_inv3 = 1.0/3.0;
+    const Scalar s_sqrt3 = sqrt(Scalar(3.0));
+
+    // The characteristic equation is x^3 - c2*x^2 + c1*x - c0 = 0.  The
+    // eigenvalues are the roots to this equation, all guaranteed to be
+    // real-valued, because the matrix is symmetric.
+    Scalar c0 = m(0,0)*m(1,1)*m(2,2) + Scalar(2)*m(1,0)*m(2,0)*m(2,1) - m(0,0)*m(2,1)*m(2,1) - m(1,1)*m(2,0)*m(2,0) - m(2,2)*m(1,0)*m(1,0);
+    Scalar c1 = m(0,0)*m(1,1) - m(1,0)*m(1,0) + m(0,0)*m(2,2) - m(2,0)*m(2,0) + m(1,1)*m(2,2) - m(2,1)*m(2,1);
+    Scalar c2 = m(0,0) + m(1,1) + m(2,2);
+
+    // Construct the parameters used in classifying the roots of the equation
+    // and in solving the equation for the roots in closed form.
+    Scalar c2_over_3 = c2*s_inv3;
+    Scalar a_over_3 = (c1 - c2*c2_over_3)*s_inv3;
+    if (a_over_3 > Scalar(0))
+      a_over_3 = Scalar(0);
+
+    Scalar half_b = Scalar(0.5)*(c0 + c2_over_3*(Scalar(2)*c2_over_3*c2_over_3 - c1));
+
+    Scalar q = half_b*half_b + a_over_3*a_over_3*a_over_3;
+    if (q > Scalar(0))
+      q = Scalar(0);
+
+    // Compute the eigenvalues by solving for the roots of the polynomial.
+    Scalar rho = sqrt(-a_over_3);
+    Scalar theta = atan2(sqrt(-q),half_b)*s_inv3;
+    Scalar cos_theta = cos(theta);
+    Scalar sin_theta = sin(theta);
+    roots(0) = c2_over_3 + Scalar(2)*rho*cos_theta;
+    roots(1) = c2_over_3 - rho*(cos_theta + s_sqrt3*sin_theta);
+    roots(2) = c2_over_3 - rho*(cos_theta - s_sqrt3*sin_theta);
+
+    // Sort in increasing order.
+    if (roots(0) >= roots(1))
+      std::swap(roots(0),roots(1));
+    if (roots(1) >= roots(2))
+    {
+      std::swap(roots(1),roots(2));
+      if (roots(0) >= roots(1))
+        std::swap(roots(0),roots(1));
+    }
+  }
+  
+  inline static void run(SolverType& solver, const MatrixType& mat, int options)
+  {
+    using std::sqrt;
+    eigen_assert(mat.cols() == 3 && mat.cols() == mat.rows());
+    eigen_assert((options&~(EigVecMask|GenEigMask))==0
+            && (options&EigVecMask)!=EigVecMask
+            && "invalid option parameter");
+    bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors;
+    
+    MatrixType& eivecs = solver.m_eivec;
+    VectorType& eivals = solver.m_eivalues;
+  
+    // map the matrix coefficients to [-1:1] to avoid over- and underflow.
+    Scalar scale = mat.cwiseAbs().maxCoeff();
+    MatrixType scaledMat = mat / scale;
+
+    // compute the eigenvalues
+    computeRoots(scaledMat,eivals);
+
+    // compute the eigen vectors
+    if(computeEigenvectors)
+    {
+      Scalar safeNorm2 = Eigen::NumTraits<Scalar>::epsilon();
+      safeNorm2 *= safeNorm2;
+      if((eivals(2)-eivals(0))<=Eigen::NumTraits<Scalar>::epsilon())
+      {
+        eivecs.setIdentity();
+      }
+      else
+      {
+        scaledMat = scaledMat.template selfadjointView<Lower>();
+        MatrixType tmp;
+        tmp = scaledMat;
+
+        Scalar d0 = eivals(2) - eivals(1);
+        Scalar d1 = eivals(1) - eivals(0);
+        int k =  d0 > d1 ? 2 : 0;
+        d0 = d0 > d1 ? d1 : d0;
+
+        tmp.diagonal().array () -= eivals(k);
+        VectorType cross;
+        Scalar n;
+        n = (cross = tmp.row(0).cross(tmp.row(1))).squaredNorm();
+
+        if(n>safeNorm2)
+          eivecs.col(k) = cross / sqrt(n);
+        else
+        {
+          n = (cross = tmp.row(0).cross(tmp.row(2))).squaredNorm();
+
+          if(n>safeNorm2)
+            eivecs.col(k) = cross / sqrt(n);
+          else
+          {
+            n = (cross = tmp.row(1).cross(tmp.row(2))).squaredNorm();
+
+            if(n>safeNorm2)
+              eivecs.col(k) = cross / sqrt(n);
+            else
+            {
+              // the input matrix and/or the eigenvaues probably contains some inf/NaN,
+              // => exit
+              // scale back to the original size.
+              eivals *= scale;
+
+              solver.m_info = NumericalIssue;
+              solver.m_isInitialized = true;
+              solver.m_eigenvectorsOk = computeEigenvectors;
+              return;
+            }
+          }
+        }
+
+        tmp = scaledMat;
+        tmp.diagonal().array() -= eivals(1);
+
+        if(d0<=Eigen::NumTraits<Scalar>::epsilon())
+          eivecs.col(1) = eivecs.col(k).unitOrthogonal();
+        else
+        {
+          n = (cross = eivecs.col(k).cross(tmp.row(0).normalized())).squaredNorm();
+          if(n>safeNorm2)
+            eivecs.col(1) = cross / sqrt(n);
+          else
+          {
+            n = (cross = eivecs.col(k).cross(tmp.row(1))).squaredNorm();
+            if(n>safeNorm2)
+              eivecs.col(1) = cross / sqrt(n);
+            else
+            {
+              n = (cross = eivecs.col(k).cross(tmp.row(2))).squaredNorm();
+              if(n>safeNorm2)
+                eivecs.col(1) = cross / sqrt(n);
+              else
+              {
+                // we should never reach this point,
+                // if so the last two eigenvalues are likely to ve very closed to each other
+                eivecs.col(1) = eivecs.col(k).unitOrthogonal();
+              }
+            }
+          }
+
+          // make sure that eivecs[1] is orthogonal to eivecs[2]
+          Scalar d = eivecs.col(1).dot(eivecs.col(k));
+          eivecs.col(1) = (eivecs.col(1) - d * eivecs.col(k)).normalized();
+        }
+
+        eivecs.col(k==2 ? 0 : 2) = eivecs.col(k).cross(eivecs.col(1)).normalized();
+      }
+    }
+    // Rescale back to the original size.
+    eivals *= scale;
+    
+    solver.m_info = Success;
+    solver.m_isInitialized = true;
+    solver.m_eigenvectorsOk = computeEigenvectors;
+  }
+};
+
+// 2x2 direct eigenvalues decomposition, code from Hauke Heibel
+template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,2,false>
+{
+  typedef typename SolverType::MatrixType MatrixType;
+  typedef typename SolverType::RealVectorType VectorType;
+  typedef typename SolverType::Scalar Scalar;
+  
+  inline static void computeRoots(const MatrixType& m, VectorType& roots)
+  {
+    using std::sqrt;
+    const Scalar t0 = Scalar(0.5) * sqrt( abs2(m(0,0)-m(1,1)) + Scalar(4)*m(1,0)*m(1,0));
+    const Scalar t1 = Scalar(0.5) * (m(0,0) + m(1,1));
+    roots(0) = t1 - t0;
+    roots(1) = t1 + t0;
+  }
+  
+  inline static void run(SolverType& solver, const MatrixType& mat, int options)
+  {
+    eigen_assert(mat.cols() == 2 && mat.cols() == mat.rows());
+    eigen_assert((options&~(EigVecMask|GenEigMask))==0
+            && (options&EigVecMask)!=EigVecMask
+            && "invalid option parameter");
+    bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors;
+    
+    MatrixType& eivecs = solver.m_eivec;
+    VectorType& eivals = solver.m_eivalues;
+  
+    // map the matrix coefficients to [-1:1] to avoid over- and underflow.
+    Scalar scale = mat.cwiseAbs().maxCoeff();
+    scale = (std::max)(scale,Scalar(1));
+    MatrixType scaledMat = mat / scale;
+    
+    // Compute the eigenvalues
+    computeRoots(scaledMat,eivals);
+    
+    // compute the eigen vectors
+    if(computeEigenvectors)
+    {
+      scaledMat.diagonal().array () -= eivals(1);
+      Scalar a2 = abs2(scaledMat(0,0));
+      Scalar c2 = abs2(scaledMat(1,1));
+      Scalar b2 = abs2(scaledMat(1,0));
+      if(a2>c2)
+      {
+        eivecs.col(1) << -scaledMat(1,0), scaledMat(0,0);
+        eivecs.col(1) /= sqrt(a2+b2);
+      }
+      else
+      {
+        eivecs.col(1) << -scaledMat(1,1), scaledMat(1,0);
+        eivecs.col(1) /= sqrt(c2+b2);
+      }
+
+      eivecs.col(0) << eivecs.col(1).unitOrthogonal();
+    }
+    
+    // Rescale back to the original size.
+    eivals *= scale;
+    
+    solver.m_info = Success;
+    solver.m_isInitialized = true;
+    solver.m_eigenvectorsOk = computeEigenvectors;
+  }
+};
+
+}
+
+template<typename MatrixType>
+SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
+::computeDirect(const MatrixType& matrix, int options)
+{
+  internal::direct_selfadjoint_eigenvalues<SelfAdjointEigenSolver,Size,NumTraits<Scalar>::IsComplex>::run(*this,matrix,options);
+  return *this;
+}
+
 namespace internal {
 template<int StorageOrder,typename RealScalar, typename Scalar, typename Index>
 static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index start, Index end, Scalar* matrixQ, Index n)
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h
@@ -111,13 +111,13 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
  }

  /** \returns the minimal corner */
-  inline const VectorType& min() const { return m_min; }
+  inline const VectorType& (min)() const { return m_min; }
  /** \returns a non const reference to the minimal corner */
-  inline VectorType& min() { return m_min; }
+  inline VectorType& (min)() { return m_min; }
  /** \returns the maximal corner */
-  inline const VectorType& max() const { return m_max; }
+  inline const VectorType& (max)() const { return m_max; }
  /** \returns a non const reference to the maximal corner */
-  inline VectorType& max() { return m_max; }
+  inline VectorType& (max)() { return m_max; }

  /** \returns the center of the box */
  inline const CwiseUnaryOp<internal::scalar_quotient1_op<Scalar>,
@@ -196,7 +196,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)

  /** \returns true if the box \a b is entirely inside the box \c *this. */
  inline bool contains(const AlignedBox& b) const
-  { return (m_min.array()<=b.min().array()).all() && (b.max().array()<=m_max.array()).all(); }
+  { return (m_min.array()<=(b.min)().array()).all() && ((b.max)().array()<=m_max.array()).all(); }

  /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this. */
  template<typename Derived>
@@ -287,8 +287,8 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
  template<typename OtherScalarType>
  inline explicit AlignedBox(const AlignedBox<OtherScalarType,AmbientDimAtCompileTime>& other)
  {
-    m_min = other.min().template cast<Scalar>();
-    m_max = other.max().template cast<Scalar>();
+    m_min = (other.min)().template cast<Scalar>();
+    m_max = (other.max)().template cast<Scalar>();
  }

  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
@@ -349,4 +349,38 @@ inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const Align
  return dist2;
 }

+/** \defgroup alignedboxtypedefs Global aligned box typedefs
+  *
+  * \ingroup Geometry_Module
+  *
+  * Eigen defines several typedef shortcuts for most common aligned box types.
+  *
+  * The general patterns are the following:
+  *
+  * \c AlignedBoxSizeType where \c Size can be \c 1, \c 2,\c 3,\c 4 for fixed size boxes or \c X for dynamic size,
+  * and where \c Type can be \c i for integer, \c f for float, \c d for double.
+  *
+  * For example, \c AlignedBox3d is a fixed-size 3x3 aligned box type of doubles, and \c AlignedBoxXf is a dynamic-size aligned box of floats.
+  *
+  * \sa class AlignedBox
+  */
+
+#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix)    \
+/** \ingroup alignedboxtypedefs */                                 \
+typedef AlignedBox<Type, Size>   AlignedBox##SizeSuffix##TypeSuffix;
+
+#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 1, 1) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X)
+
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int,                  i)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float,                f)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(double,               d)
+
+#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
+#undef EIGEN_MAKE_TYPEDEFS
+
 #endif // EIGEN_ALIGNEDBOX_H
--- a/Eigen/src/Geometry/AngleAxis.h
+++ b/Eigen/src/Geometry/AngleAxis.h
@@ -171,6 +171,9 @@ template<typename Scalar>
 template<typename QuatDerived>
 AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived>& q)
 {
+  using std::acos;
+  using std::min;
+  using std::max;
  Scalar n2 = q.vec().squaredNorm();
  if (n2 < NumTraits<Scalar>::dummy_precision()*NumTraits<Scalar>::dummy_precision())
  {
@@ -179,7 +182,7 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived
  }
  else
  {
-    m_angle = Scalar(2)*std::acos(std::min(std::max(Scalar(-1),q.w()),Scalar(1)));
+    m_angle = Scalar(2)*acos((min)((max)(Scalar(-1),q.w()),Scalar(1)));
    m_axis = q.vec() / internal::sqrt(n2);
  }
  return *this;
--- a/Eigen/src/Geometry/Hyperplane.h
+++ b/Eigen/src/Geometry/Hyperplane.h
@@ -189,7 +189,7 @@ public:
    *
    * \note If \a other is approximately parallel to *this, this method will return any point on *this.
    */
-  VectorType intersection(const Hyperplane& other)
+  VectorType intersection(const Hyperplane& other) const
  {
    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
    Scalar det = coeffs().coeff(0) * other.coeffs().coeff(1) - coeffs().coeff(1) * other.coeffs().coeff(0);
@@ -213,8 +213,8 @@ public:
  /** Applies the transformation matrix \a mat to \c *this and returns a reference to \c *this.
    *
    * \param mat the Dim x Dim transformation matrix
-    * \param traits specifies whether the matrix \a mat represents an Isometry
-    *               or a more generic Affine transformation. The default is Affine.
+    * \param traits specifies whether the matrix \a mat represents an #Isometry
+    *               or a more generic #Affine transformation. The default is #Affine.
    */
  template<typename XprType>
  inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)
@@ -233,8 +233,8 @@ public:
  /** Applies the transformation \a t to \c *this and returns a reference to \c *this.
    *
    * \param t the transformation of dimension Dim
-    * \param traits specifies whether the transformation \a t represents an Isometry
-    *               or a more generic Affine transformation. The default is Affine.
+    * \param traits specifies whether the transformation \a t represents an #Isometry
+    *               or a more generic #Affine transformation. The default is #Affine.
    *               Other kind of transformations are not supported.
    */
  template<int TrOptions>
--- a/Eigen/src/Geometry/ParametrizedLine.h
+++ b/Eigen/src/Geometry/ParametrizedLine.h
@@ -34,7 +34,7 @@
  *
  * A parametrized line is defined by an origin point \f$ \mathbf{o} \f$ and a unit
  * direction vector \f$ \mathbf{d} \f$ such that the line corresponds to
-  * the set \f$ l(t) = \mathbf{o} + t \mathbf{d} \f$, \f$ l \in \mathbf{R} \f$.
+  * the set \f$ l(t) = \mathbf{o} + t \mathbf{d} \f$, \f$ t \in \mathbf{R} \f$.
  *
  * \param _Scalar the scalar type, i.e., the type of the coefficients
  * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
@@ -107,7 +107,7 @@ public:
  { return origin() + direction().dot(p-origin()) * direction(); }

  template <int OtherOptions>
-  Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane);
+  Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;

  /** \returns \c *this with scalar type casted to \a NewScalarType
    *
@@ -159,7 +159,7 @@ inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const H
  */
 template <typename _Scalar, int _AmbientDim, int _Options>
 template <int OtherOptions>
-inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane)
+inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
 {
  return -(hyperplane.offset()+hyperplane.normal().dot(origin()))
          / hyperplane.normal().dot(direction());
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -49,6 +49,9 @@ public:
  typedef typename internal::traits<Derived>::Scalar Scalar;
  typedef typename NumTraits<Scalar>::Real RealScalar;
  typedef typename internal::traits<Derived>::Coefficients Coefficients;
+  enum {
+    Flags = Eigen::internal::traits<Derived>::Flags
+  };

 // typedef typename Matrix<Scalar,4,1> Coefficients;
  /** the type of a 3D vector */
@@ -179,10 +182,9 @@ public:
  template<typename NewScalarType>
  inline typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const
  {
-    return typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type(
-      coeffs().template cast<NewScalarType>());
+    return typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type(derived());
  }
-  
+
 #ifdef EIGEN_QUATERNIONBASE_PLUGIN
 # include EIGEN_QUATERNIONBASE_PLUGIN
 #endif
@@ -222,21 +224,25 @@ struct traits<Quaternion<_Scalar,_Options> >
  typedef _Scalar Scalar;
  typedef Matrix<_Scalar,4,1,_Options> Coefficients;
  enum{
-    PacketAccess = _Options & DontAlign ? Unaligned : Aligned
+    IsAligned = internal::traits<Coefficients>::Flags & AlignedBit,
+    Flags = IsAligned ? (AlignedBit | LvalueBit) : LvalueBit
  };
 };
 }

 template<typename _Scalar, int _Options>
-class Quaternion : public QuaternionBase<Quaternion<_Scalar,_Options> >{
+class Quaternion : public QuaternionBase<Quaternion<_Scalar,_Options> >
+{
  typedef QuaternionBase<Quaternion<_Scalar,_Options> > Base;
+  enum { IsAligned = internal::traits<Quaternion>::IsAligned };
+
 public:
  typedef _Scalar Scalar;

  EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Quaternion)
  using Base::operator*=;

-  typedef typename internal::traits<Quaternion<Scalar,_Options> >::Coefficients Coefficients;
+  typedef typename internal::traits<Quaternion>::Coefficients Coefficients;
  typedef typename Base::AngleAxisType AngleAxisType;

  /** Default constructor leaving the quaternion uninitialized. */
@@ -267,9 +273,16 @@ public:
  template<typename Derived>
  explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }

+  /** Explicit copy constructor with scalar conversion */
+  template<typename OtherScalar, int OtherOptions>
+  explicit inline Quaternion(const Quaternion<OtherScalar, OtherOptions>& other)
+  { m_coeffs = other.coeffs().template cast<Scalar>(); }
+
  inline Coefficients& coeffs() { return m_coeffs;}
  inline const Coefficients& coeffs() const { return m_coeffs;}

+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(IsAligned)
+
 protected:
  Coefficients m_coeffs;
  
@@ -294,33 +307,53 @@ typedef Quaternion<double> Quaterniond;
 ***************************************************************************/

 namespace internal {
-template<typename _Scalar, int _PacketAccess>
-struct traits<Map<Quaternion<_Scalar>, _PacketAccess> >:
-traits<Quaternion<_Scalar> >
-{
-  typedef _Scalar Scalar;
-  typedef Map<Matrix<_Scalar,4,1>, _PacketAccess> Coefficients;
-  enum {
-    PacketAccess = _PacketAccess
+  template<typename _Scalar, int _Options>
+  struct traits<Map<Quaternion<_Scalar>, _Options> >:
+  traits<Quaternion<_Scalar, _Options> >
+  {
+    typedef _Scalar Scalar;
+    typedef Map<Matrix<_Scalar,4,1>, _Options> Coefficients;
+
+    typedef traits<Quaternion<_Scalar, _Options> > TraitsBase;
+    enum {
+      IsAligned = TraitsBase::IsAligned,
+
+      Flags = TraitsBase::Flags
+    };
+  };
+}
+
+namespace internal {
+  template<typename _Scalar, int _Options>
+  struct traits<Map<const Quaternion<_Scalar>, _Options> >:
+  traits<Quaternion<_Scalar> >
+  {
+    typedef _Scalar Scalar;
+    typedef Map<const Matrix<_Scalar,4,1>, _Options> Coefficients;
+
+    typedef traits<Quaternion<_Scalar, _Options> > TraitsBase;
+    enum {
+      IsAligned = TraitsBase::IsAligned,
+      Flags = TraitsBase::Flags & ~LvalueBit
+    };
  };
-};
 }

 /** \brief Quaternion expression mapping a constant memory buffer
  *
  * \param _Scalar the type of the Quaternion coefficients
-  * \param PacketAccess see class Map
+  * \param _Options see class Map
  *
  * This is a specialization of class Map for Quaternion. This class allows to view
  * a 4 scalar memory buffer as an Eigen's Quaternion object.
  *
  * \sa class Map, class Quaternion, class QuaternionBase
  */
-template<typename _Scalar, int PacketAccess>
-class Map<const Quaternion<_Scalar>, PacketAccess >
-  : public QuaternionBase<Map<const Quaternion<_Scalar>, PacketAccess> >
+template<typename _Scalar, int _Options>
+class Map<const Quaternion<_Scalar>, _Options >
+  : public QuaternionBase<Map<const Quaternion<_Scalar>, _Options> >
 {
-    typedef QuaternionBase<Map<Quaternion<_Scalar>, PacketAccess> > Base;
+    typedef QuaternionBase<Map<const Quaternion<_Scalar>, _Options> > Base;

  public:
    typedef _Scalar Scalar;
@@ -333,7 +366,7 @@ class Map<const Quaternion<_Scalar>, PacketAccess >
      * The pointer \a coeffs must reference the four coeffecients of Quaternion in the following order:
      * \code *coeffs == {x, y, z, w} \endcode
      *
-      * If the template parameter PacketAccess is set to Aligned, then the pointer coeffs must be aligned. */
+      * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
    EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}

    inline const Coefficients& coeffs() const { return m_coeffs;}
@@ -345,18 +378,18 @@ class Map<const Quaternion<_Scalar>, PacketAccess >
 /** \brief Expression of a quaternion from a memory buffer
  *
  * \param _Scalar the type of the Quaternion coefficients
-  * \param PacketAccess see class Map
+  * \param _Options see class Map
  *
  * This is a specialization of class Map for Quaternion. This class allows to view
  * a 4 scalar memory buffer as an Eigen's  Quaternion object.
  *
  * \sa class Map, class Quaternion, class QuaternionBase
  */
-template<typename _Scalar, int PacketAccess>
-class Map<Quaternion<_Scalar>, PacketAccess >
-  : public QuaternionBase<Map<Quaternion<_Scalar>, PacketAccess> >
+template<typename _Scalar, int _Options>
+class Map<Quaternion<_Scalar>, _Options >
+  : public QuaternionBase<Map<Quaternion<_Scalar>, _Options> >
 {
-    typedef QuaternionBase<Map<Quaternion<_Scalar>, PacketAccess> > Base;
+    typedef QuaternionBase<Map<Quaternion<_Scalar>, _Options> > Base;

  public:
    typedef _Scalar Scalar;
@@ -369,7 +402,7 @@ class Map<Quaternion<_Scalar>, PacketAccess >
      * The pointer \a coeffs must reference the four coeffecients of Quaternion in the following order:
      * \code *coeffs == {x, y, z, w} \endcode
      *
-      * If the template parameter PacketAccess is set to Aligned, then the pointer coeffs must be aligned. */
+      * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
    EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}

    inline Coefficients& coeffs() { return m_coeffs; }
@@ -399,7 +432,7 @@ typedef Map<Quaternion<double>, Aligned>  QuaternionMapAlignedd;
 // Generic Quaternion * Quaternion product
 // This product can be specialized for a given architecture via the Arch template argument.
 namespace internal {
-template<int Arch, class Derived1, class Derived2, typename Scalar, int PacketAccess> struct quat_product
+template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options> struct quat_product
 {
  EIGEN_STRONG_INLINE static Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
    return Quaternion<Scalar>
@@ -423,7 +456,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c
   YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
  return internal::quat_product<Architecture::Target, Derived, OtherDerived,
                         typename internal::traits<Derived>::Scalar,
-                         internal::traits<Derived>::PacketAccess && internal::traits<OtherDerived>::PacketAccess>::run(*this, other);
+                         internal::traits<Derived>::IsAligned && internal::traits<OtherDerived>::IsAligned>::run(*this, other);
 }

 /** \sa operator*(Quaternion) */
@@ -551,6 +584,7 @@ template<class Derived>
 template<typename Derived1, typename Derived2>
 inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
 {
+  using std::max;
  Vector3 v0 = a.normalized();
  Vector3 v1 = b.normalized();
  Scalar c = v1.dot(v0);
@@ -565,7 +599,7 @@ inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Deri
  //    which yields a singular value problem
  if (c < Scalar(-1)+NumTraits<Scalar>::dummy_precision())
  {
-    c = std::max<Scalar>(c,-1);
+    c = max<Scalar>(c,-1);
    Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose();
    JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV);
    Vector3 axis = svd.matrixV().col(2);
@@ -625,10 +659,11 @@ template <class OtherDerived>
 inline typename internal::traits<Derived>::Scalar
 QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& other) const
 {
+  using std::acos;
  double d = internal::abs(this->dot(other));
  if (d>=1.0)
    return Scalar(0);
-  return static_cast<Scalar>(2 * std::acos(d));
+  return static_cast<Scalar>(2 * acos(d));
 }

 /** \returns the spherical linear interpolation between the two quaternions
@@ -639,6 +674,7 @@ template <class OtherDerived>
 Quaternion<typename internal::traits<Derived>::Scalar>
 QuaternionBase<Derived>::slerp(Scalar t, const QuaternionBase<OtherDerived>& other) const
 {
+  using std::acos;
  static const Scalar one = Scalar(1) - NumTraits<Scalar>::epsilon();
  Scalar d = this->dot(other);
  Scalar absD = internal::abs(d);
@@ -654,7 +690,7 @@ QuaternionBase<Derived>::slerp(Scalar t, const QuaternionBase<OtherDerived>& oth
  else
  {
    // theta is the angle between the 2 quaternions
-    Scalar theta = std::acos(absD);
+    Scalar theta = acos(absD);
    Scalar sinTheta = internal::sin(theta);

    scale0 = internal::sin( ( Scalar(1) - t ) * theta) / sinTheta;
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -86,11 +86,11 @@ template<typename TransformType> struct transform_take_affine_part;
  * \tparam _Scalar the scalar type, i.e., the type of the coefficients
  * \tparam _Dim the dimension of the space
  * \tparam _Mode the type of the transformation. Can be:
-  *              - Affine: the transformation is stored as a (Dim+1)^2 matrix,
-  *                        where the last row is assumed to be [0 ... 0 1].
-  *              - AffineCompact: the transformation is stored as a (Dim)x(Dim+1) matrix.
-  *              - Projective: the transformation is stored as a (Dim+1)^2 matrix
-  *                            without any assumption.
+  *              - #Affine: the transformation is stored as a (Dim+1)^2 matrix,
+  *                         where the last row is assumed to be [0 ... 0 1].
+  *              - #AffineCompact: the transformation is stored as a (Dim)x(Dim+1) matrix.
+  *              - #Projective: the transformation is stored as a (Dim+1)^2 matrix
+  *                             without any assumption.
  * \tparam _Options has the same meaning as in class Matrix. It allows to specify DontAlign and/or RowMajor.
  *                  These Options are passed directly to the underlying matrix type.
  *
@@ -279,6 +279,9 @@ public:
  template<typename OtherDerived>
  inline explicit Transform(const EigenBase<OtherDerived>& other)
  {
+    EIGEN_STATIC_ASSERT((internal::is_same<Scalar,typename OtherDerived::Scalar>::value),
+      YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY);
+
    check_template_params();
    internal::transform_construct_from_matrix<OtherDerived,Mode,Options,Dim,HDim>::run(this, other.derived());
  }
@@ -287,6 +290,9 @@ public:
  template<typename OtherDerived>
  inline Transform& operator=(const EigenBase<OtherDerived>& other)
  {
+    EIGEN_STATIC_ASSERT((internal::is_same<Scalar,typename OtherDerived::Scalar>::value),
+      YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY);
+
    internal::transform_construct_from_matrix<OtherDerived,Mode,Options,Dim,HDim>::run(this, other.derived());
    return *this;
  }
@@ -672,7 +678,7 @@ Transform<Scalar,Dim,Mode,Options>::Transform(const QMatrix& other)
  *
  * This function is available only if the token EIGEN_QT_SUPPORT is defined.
  */
-template<typename Scalar, int Dim, int Mode,int Otpions>
+template<typename Scalar, int Dim, int Mode,int Options>
 Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const QMatrix& other)
 {
  EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
@@ -718,9 +724,13 @@ Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator
 {
  check_template_params();
  EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
-  m_matrix << other.m11(), other.m21(), other.dx(),
-              other.m12(), other.m22(), other.dy(),
-              other.m13(), other.m23(), other.m33();
+  if (Mode == int(AffineCompact))
+    m_matrix << other.m11(), other.m21(), other.dx(),
+                other.m12(), other.m22(), other.dy();
+  else
+    m_matrix << other.m11(), other.m21(), other.dx(),
+                other.m12(), other.m22(), other.dy(),
+                other.m13(), other.m23(), other.m33();
  return *this;
 }

@@ -732,9 +742,14 @@ template<typename Scalar, int Dim, int Mode, int Options>
 QTransform Transform<Scalar,Dim,Mode,Options>::toQTransform(void) const
 {
  EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
-  return QTransform(matrix.coeff(0,0), matrix.coeff(1,0), matrix.coeff(2,0)
-                    matrix.coeff(0,1), matrix.coeff(1,1), matrix.coeff(2,1)
-                    matrix.coeff(0,2), matrix.coeff(1,2), matrix.coeff(2,2));
+  if (Mode == int(AffineCompact))
+    return QTransform(m_matrix.coeff(0,0), m_matrix.coeff(1,0),
+                      m_matrix.coeff(0,1), m_matrix.coeff(1,1),
+                      m_matrix.coeff(0,2), m_matrix.coeff(1,2));
+  else
+    return QTransform(m_matrix.coeff(0,0), m_matrix.coeff(1,0), m_matrix.coeff(2,0),
+                      m_matrix.coeff(0,1), m_matrix.coeff(1,1), m_matrix.coeff(2,1),
+                      m_matrix.coeff(0,2), m_matrix.coeff(1,2), m_matrix.coeff(2,2));
 }
 #endif

@@ -1082,10 +1097,10 @@ struct projective_transform_inverse<TransformType, Projective>
  *
  * \param hint allows to optimize the inversion process when the transformation
  * is known to be not a general transformation (optional). The possible values are:
-  *  - Projective if the transformation is not necessarily affine, i.e., if the
+  *  - #Projective if the transformation is not necessarily affine, i.e., if the
  *    last row is not guaranteed to be [0 ... 0 1]
-  *  - Affine if the last row can be assumed to be [0 ... 0 1]
-  *  - Isometry if the transformation is only a concatenations of translations
+  *  - #Affine if the last row can be assumed to be [0 ... 0 1]
+  *  - #Isometry if the transformation is only a concatenations of translations
  *    and rotations.
  *  The default is the template class parameter \c Mode.
  *
--- a/Eigen/src/Geometry/Translation.h
+++ b/Eigen/src/Geometry/Translation.h
@@ -54,6 +54,8 @@ public:
  typedef Matrix<Scalar,Dim,Dim> LinearMatrixType;
  /** corresponding affine transformation type */
  typedef Transform<Scalar,Dim,Affine> AffineTransformType;
+  /** corresponding isometric transformation type */
+  typedef Transform<Scalar,Dim,Isometry> IsometryTransformType;

 protected:

@@ -114,8 +116,8 @@ public:

  /** Concatenates a translation and a rotation */
  template<typename Derived>
-  inline AffineTransformType operator*(const RotationBase<Derived,Dim>& r) const
-  { return *this * r.toRotationMatrix(); }
+  inline IsometryTransformType operator*(const RotationBase<Derived,Dim>& r) const
+  { return *this * IsometryTransformType(r); }

  /** \returns the concatenation of a linear transformation \a l with the translation \a t */
  // its a nightmare to define a templated friend function outside its declaration
--- a/Eigen/src/Householder/Householder.h
+++ b/Eigen/src/Householder/Householder.h
@@ -72,8 +72,9 @@ void MatrixBase<Derived>::makeHouseholder(

  if(tailSqNorm == RealScalar(0) && internal::imag(c0)==RealScalar(0))
  {
-    tau = 0;
+    tau = RealScalar(0);
    beta = internal::real(c0);
+    essential.setZero();
  }
  else
  {
--- a/Eigen/src/Householder/HouseholderSequence.h
+++ b/Eigen/src/Householder/HouseholderSequence.h
@@ -237,13 +237,20 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
    ConjugateReturnType inverse() const { return adjoint(); }

    /** \internal */
-    template<typename DestType> void evalTo(DestType& dst) const
+    template<typename DestType> inline void evalTo(DestType& dst) const
    {
-      Index vecs = m_length;
-      // FIXME find a way to pass this temporary if the user wants to
      Matrix<Scalar, DestType::RowsAtCompileTime, 1,
-             AutoAlign|ColMajor, DestType::MaxRowsAtCompileTime, 1> temp(rows());
-      if(    internal::is_same<typename internal::remove_all<VectorsType>::type,DestType>::value
+             AutoAlign|ColMajor, DestType::MaxRowsAtCompileTime, 1> workspace(rows());
+      evalTo(dst, workspace);
+    }
+
+    /** \internal */
+    template<typename Dest, typename Workspace>
+    void evalTo(Dest& dst, Workspace& workspace) const
+    {
+      workspace.resize(rows());
+      Index vecs = m_length;
+      if(    internal::is_same<typename internal::remove_all<VectorsType>::type,Dest>::value
          && internal::extract_data(dst) == internal::extract_data(m_vectors))
      {
        // in-place
@@ -254,10 +261,10 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
          Index cornerSize = rows() - k - m_shift;
          if(m_trans)
            dst.bottomRightCorner(cornerSize, cornerSize)
-            .applyHouseholderOnTheRight(essentialVector(k), m_coeffs.coeff(k), &temp.coeffRef(0));
+               .applyHouseholderOnTheRight(essentialVector(k), m_coeffs.coeff(k), workspace.data());
          else
            dst.bottomRightCorner(cornerSize, cornerSize)
-              .applyHouseholderOnTheLeft(essentialVector(k), m_coeffs.coeff(k), &temp.coeffRef(0));
+               .applyHouseholderOnTheLeft(essentialVector(k), m_coeffs.coeff(k), workspace.data());

          // clear the off diagonal vector
          dst.col(k).tail(rows()-k-1).setZero();
@@ -274,10 +281,10 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
          Index cornerSize = rows() - k - m_shift;
          if(m_trans)
            dst.bottomRightCorner(cornerSize, cornerSize)
-            .applyHouseholderOnTheRight(essentialVector(k), m_coeffs.coeff(k), &temp.coeffRef(0));
+               .applyHouseholderOnTheRight(essentialVector(k), m_coeffs.coeff(k), &workspace.coeffRef(0));
          else
            dst.bottomRightCorner(cornerSize, cornerSize)
-              .applyHouseholderOnTheLeft(essentialVector(k), m_coeffs.coeff(k), &temp.coeffRef(0));
+               .applyHouseholderOnTheLeft(essentialVector(k), m_coeffs.coeff(k), &workspace.coeffRef(0));
        }
      }
    }
@@ -285,24 +292,40 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
    /** \internal */
    template<typename Dest> inline void applyThisOnTheRight(Dest& dst) const
    {
-      Matrix<Scalar,1,Dest::RowsAtCompileTime> temp(dst.rows());
+      Matrix<Scalar,1,Dest::RowsAtCompileTime,RowMajor,1,Dest::MaxRowsAtCompileTime> workspace(dst.rows());
+      applyThisOnTheRight(dst, workspace);
+    }
+
+    /** \internal */
+    template<typename Dest, typename Workspace>
+    inline void applyThisOnTheRight(Dest& dst, Workspace& workspace) const
+    {
+      workspace.resize(dst.rows());
      for(Index k = 0; k < m_length; ++k)
      {
        Index actual_k = m_trans ? m_length-k-1 : k;
        dst.rightCols(rows()-m_shift-actual_k)
-           .applyHouseholderOnTheRight(essentialVector(actual_k), m_coeffs.coeff(actual_k), &temp.coeffRef(0));
+           .applyHouseholderOnTheRight(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
      }
    }

    /** \internal */
    template<typename Dest> inline void applyThisOnTheLeft(Dest& dst) const
    {
-      Matrix<Scalar,1,Dest::ColsAtCompileTime> temp(dst.cols());
+      Matrix<Scalar,1,Dest::ColsAtCompileTime,RowMajor,1,Dest::MaxColsAtCompileTime> workspace(dst.cols());
+      applyThisOnTheLeft(dst, workspace);
+    }
+
+    /** \internal */
+    template<typename Dest, typename Workspace>
+    inline void applyThisOnTheLeft(Dest& dst, Workspace& workspace) const
+    {
+      workspace.resize(dst.cols());
      for(Index k = 0; k < m_length; ++k)
      {
        Index actual_k = m_trans ? k : m_length-k-1;
        dst.bottomRows(rows()-m_shift-actual_k)
-           .applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), &temp.coeffRef(0));
+           .applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
      }
    }

--- a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
+++ b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
@@ -0,0 +1,141 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_BASIC_PRECONDITIONERS_H
+#define EIGEN_BASIC_PRECONDITIONERS_H
+
+/** \ingroup IterativeLinearSolvers_Module
+  * \brief A preconditioner based on the digonal entries
+  *
+  * This class allows to approximately solve for A.x = b problems assuming A is a diagonal matrix.
+  * In other words, this preconditioner neglects all off diagonal entries and, in Eigen's language, solves for:
+  * \code
+  * A.diagonal().asDiagonal() . x = b
+  * \endcode
+  *
+  * \tparam _Scalar the type of the scalar.
+  *
+  * This preconditioner is suitable for both selfadjoint and general problems.
+  * The diagonal entries are pre-inverted and stored into a dense vector.
+  *
+  * \note A variant that has yet to be implemented would attempt to preserve the norm of each column.
+  *
+  */
+template <typename _Scalar>
+class DiagonalPreconditioner
+{
+    typedef _Scalar Scalar;
+    typedef Matrix<Scalar,Dynamic,1> Vector;
+    typedef typename Vector::Index Index;
+
+  public:
+    typedef Matrix<Scalar,Dynamic,Dynamic> MatrixType;
+
+    DiagonalPreconditioner() : m_isInitialized(false) {}
+
+    template<typename MatrixType>
+    DiagonalPreconditioner(const MatrixType& mat) : m_invdiag(mat.cols())
+    {
+      compute(mat);
+    }
+
+    Index rows() const { return m_invdiag.size(); }
+    Index cols() const { return m_invdiag.size(); }
+
+    template<typename MatrixType>
+    DiagonalPreconditioner& compute(const MatrixType& mat)
+    {
+      m_invdiag.resize(mat.cols());
+      for(int j=0; j<mat.outerSize(); ++j)
+      {
+        typename MatrixType::InnerIterator it(mat,j);
+        while(it && it.index()!=j) ++it;
+        if(it && it.index()==j)
+          m_invdiag(j) = Scalar(1)/it.value();
+        else
+          m_invdiag(j) = 0;
+      }
+      m_isInitialized = true;
+      return *this;
+    }
+
+    template<typename Rhs, typename Dest>
+    void _solve(const Rhs& b, Dest& x) const
+    {
+      x = m_invdiag.array() * b.array() ;
+    }
+
+    template<typename Rhs> inline const internal::solve_retval<DiagonalPreconditioner, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "DiagonalPreconditioner is not initialized.");
+      eigen_assert(m_invdiag.size()==b.rows()
+                && "DiagonalPreconditioner::solve(): invalid number of rows of the right hand side matrix b");
+      return internal::solve_retval<DiagonalPreconditioner, Rhs>(*this, b.derived());
+    }
+
+  protected:
+    Vector m_invdiag;
+    bool m_isInitialized;
+};
+
+namespace internal {
+
+template<typename _MatrixType, typename Rhs>
+struct solve_retval<DiagonalPreconditioner<_MatrixType>, Rhs>
+  : solve_retval_base<DiagonalPreconditioner<_MatrixType>, Rhs>
+{
+  typedef DiagonalPreconditioner<_MatrixType> Dec;
+  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    dec()._solve(rhs(),dst);
+  }
+};
+
+}
+
+/** \ingroup IterativeLinearSolvers_Module
+  * \brief A naive preconditioner which approximates any matrix as the identity matrix
+  *
+  * \sa class DiagonalPreconditioner
+  */
+class IdentityPreconditioner
+{
+  public:
+
+    IdentityPreconditioner() {}
+
+    template<typename MatrixType>
+    IdentityPreconditioner(const MatrixType& ) {}
+
+    template<typename MatrixType>
+    IdentityPreconditioner& compute(const MatrixType& ) { return *this; }
+    
+    template<typename Rhs>
+    inline const Rhs& solve(const Rhs& b) const { return b; }
+};
+
+#endif // EIGEN_BASIC_PRECONDITIONERS_H
--- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
+++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
@@ -0,0 +1,262 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_BICGSTAB_H
+#define EIGEN_BICGSTAB_H
+
+namespace internal {
+
+/** \internal Low-level bi conjugate gradient stabilized algorithm
+  * \param mat The matrix A
+  * \param rhs The right hand side vector b
+  * \param x On input and initial solution, on output the computed solution.
+  * \param precond A preconditioner being able to efficiently solve for an
+  *                approximation of Ax=b (regardless of b)
+  * \param iters On input the max number of iteration, on output the number of performed iterations.
+  * \param tol_error On input the tolerance error, on output an estimation of the relative error.
+  */
+template<typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
+void bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x,
+              const Preconditioner& precond, int& iters,
+              typename Dest::RealScalar& tol_error)
+{
+  using std::sqrt;
+  using std::abs;
+  typedef typename Dest::RealScalar RealScalar;
+  typedef typename Dest::Scalar Scalar;
+  typedef Matrix<Scalar,Dynamic,1> VectorType;
+  
+  RealScalar tol = tol_error;
+  int maxIters = iters;
+
+  int n = mat.cols();
+  VectorType r  = rhs - mat * x;
+  VectorType r0 = r;
+  RealScalar r0_sqnorm = r0.squaredNorm();
+  Scalar rho    = 1;
+  Scalar alpha  = 1;
+  Scalar w      = 1;
+  
+  VectorType v = VectorType::Zero(n), p = VectorType::Zero(n);
+  VectorType y(n),  z(n);
+  VectorType kt(n), ks(n);
+
+  VectorType s(n), t(n);
+
+  RealScalar tol2 = tol*tol;
+  int i = 0;
+
+  do
+  {
+    Scalar rho_old = rho;
+
+    rho = r0.dot(r);
+    Scalar beta = (rho/rho_old) * (alpha / w);
+    p = r + beta * (p - w * v);
+    
+    y = precond.solve(p);
+    v.noalias() = mat * y;
+
+    alpha = rho / r0.dot(v);
+    s = r - alpha * v;
+
+    z = precond.solve(s);
+    t.noalias() = mat * z;
+
+    kt = precond.solve(t);
+    ks = precond.solve(s);
+
+    w = kt.dot(ks) / kt.squaredNorm();
+    x += alpha * y + w * z;
+    r = s - w * t;
+    ++i;
+  } while ( r.squaredNorm()/r0_sqnorm > tol2 && i<maxIters );
+  
+  tol_error = sqrt(r.squaredNorm()/r0_sqnorm);
+  //tol_error = sqrt(abs(absNew / absInit));
+  iters = i;
+}
+
+}
+
+template< typename _MatrixType,
+          typename _Preconditioner = DiagonalPreconditioner<typename _MatrixType::Scalar> >
+class BiCGSTAB;
+
+namespace internal {
+
+template< typename _MatrixType, typename _Preconditioner>
+struct traits<BiCGSTAB<_MatrixType,_Preconditioner> >
+{
+  typedef _MatrixType MatrixType;
+  typedef _Preconditioner Preconditioner;
+};
+
+}
+
+/** \ingroup IterativeLinearSolvers_Module
+  * \brief A bi conjugate gradient stabilized solver for sparse square problems
+  *
+  * This class allows to solve for A.x = b sparse linear problems using a bi conjugate gradient
+  * stabilized algorithm. The vectors x and b can be either dense or sparse.
+  *
+  * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix.
+  * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
+  *
+  * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
+  * and setTolerance() methods. The default are 1000 max iterations and NumTraits<Scalar>::epsilon()
+  * for the tolerance.
+  * 
+  * This class can be used as the direct solver classes. Here is a typical usage example:
+  * \code
+  * int n = 10000;
+  * VectorXd x(n), b(n);
+  * SparseMatrix<double> A(n,n);
+  * // fill A and b
+  * BiCGSTAB<SparseMatrix<double> > solver;
+  * solver(A);
+  * x = solver.solve(b);
+  * std::cout << "#iterations:     " << solver.iterations() << std::endl;
+  * std::cout << "estimated error: " << solver.error()      << std::endl;
+  * // update b, and solve again
+  * x = solver.solve(b);
+  * \endcode
+  * 
+  * By default the iterations start with x=0 as an initial guess of the solution.
+  * One can control the start using the solveWithGuess() method. Here is a step by
+  * step execution example starting with a random guess and printing the evolution
+  * of the estimated error:
+  * * \code
+  * x = VectorXd::Random(n);
+  * solver.setMaxIterations(1);
+  * int i = 0;
+  * do {
+  *   x = solver.solveWithGuess(b,x);
+  *   std::cout << i << " : " << solver.error() << std::endl;
+  *   ++i;
+  * } while (solver.info()!=Success && i<100);
+  * \endcode
+  * Note that such a step by step excution is slightly slower.
+  * 
+  * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
+  */
+template< typename _MatrixType, typename _Preconditioner>
+class BiCGSTAB : public IterativeSolverBase<BiCGSTAB<_MatrixType,_Preconditioner> >
+{
+  typedef IterativeSolverBase<BiCGSTAB> Base;
+  using Base::mp_matrix;
+  using Base::m_error;
+  using Base::m_iterations;
+  using Base::m_info;
+  using Base::m_isInitialized;
+public:
+  typedef _MatrixType MatrixType;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::Index Index;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef _Preconditioner Preconditioner;
+
+public:
+
+  /** Default constructor. */
+  BiCGSTAB() : Base() {}
+
+  /** Initialize the solver with matrix \a A for further \c Ax=b solving.
+    * 
+    * This constructor is a shortcut for the default constructor followed
+    * by a call to compute().
+    * 
+    * \warning this class stores a reference to the matrix A as well as some
+    * precomputed values that depend on it. Therefore, if \a A is changed
+    * this class becomes invalid. Call compute() to update it with the new
+    * matrix A, or modify a copy of A.
+    */
+  BiCGSTAB(const MatrixType& A) : Base(A) {}
+
+  ~BiCGSTAB() {}
+  
+  /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
+    * \a x0 as an initial solution.
+    *
+    * \sa compute()
+    */
+  template<typename Rhs,typename Guess>
+  inline const internal::solve_retval_with_guess<BiCGSTAB, Rhs, Guess>
+  solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const
+  {
+    eigen_assert(m_isInitialized && "BiCGSTAB is not initialized.");
+    eigen_assert(Base::rows()==b.rows()
+              && "BiCGSTAB::solve(): invalid number of rows of the right hand side matrix b");
+    return internal::solve_retval_with_guess
+            <BiCGSTAB, Rhs, Guess>(*this, b.derived(), x0);
+  }
+  
+  /** \internal */
+  template<typename Rhs,typename Dest>
+  void _solveWithGuess(const Rhs& b, Dest& x) const
+  {    
+    for(int j=0; j<b.cols(); ++j)
+    {
+      m_iterations = Base::m_maxIterations;
+      m_error = Base::m_tolerance;
+      
+      typename Dest::ColXpr xj(x,j);
+      internal::bicgstab(*mp_matrix, b.col(j), xj, Base::m_preconditioner, m_iterations, m_error);
+    }
+    
+    m_isInitialized = true;
+    m_info = m_error <= Base::m_tolerance ? Success : NoConvergence;
+  }
+
+  /** \internal */
+  template<typename Rhs,typename Dest>
+  void _solve(const Rhs& b, Dest& x) const
+  {
+    x.setOnes();
+    _solveWithGuess(b,x);
+  }
+
+protected:
+
+};
+
+
+namespace internal {
+
+  template<typename _MatrixType, typename _Preconditioner, typename Rhs>
+struct solve_retval<BiCGSTAB<_MatrixType, _Preconditioner>, Rhs>
+  : solve_retval_base<BiCGSTAB<_MatrixType, _Preconditioner>, Rhs>
+{
+  typedef BiCGSTAB<_MatrixType, _Preconditioner> Dec;
+  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    dec()._solve(rhs(),dst);
+  }
+};
+
+}
+
+#endif // EIGEN_BICGSTAB_H
--- a/Eigen/src/IterativeLinearSolvers/CMakeLists.txt
+++ b/Eigen/src/IterativeLinearSolvers/CMakeLists.txt
@@ -0,0 +1,6 @@
+FILE(GLOB Eigen_IterativeLinearSolvers_SRCS "*.h")
+
+INSTALL(FILES
+  ${Eigen_IterativeLinearSolvers_SRCS}
+  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/IterativeLinearSolvers COMPONENT Devel
+  )
--- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
+++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
@@ -0,0 +1,256 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_CONJUGATE_GRADIENT_H
+#define EIGEN_CONJUGATE_GRADIENT_H
+
+namespace internal {
+
+/** \internal Low-level conjugate gradient algorithm
+  * \param mat The matrix A
+  * \param rhs The right hand side vector b
+  * \param x On input and initial solution, on output the computed solution.
+  * \param precond A preconditioner being able to efficiently solve for an
+  *                approximation of Ax=b (regardless of b)
+  * \param iters On input the max number of iteration, on output the number of performed iterations.
+  * \param tol_error On input the tolerance error, on output an estimation of the relative error.
+  */
+template<typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
+EIGEN_DONT_INLINE
+void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,
+                        const Preconditioner& precond, int& iters,
+                        typename Dest::RealScalar& tol_error)
+{
+  using std::sqrt;
+  using std::abs;
+  typedef typename Dest::RealScalar RealScalar;
+  typedef typename Dest::Scalar Scalar;
+  typedef Matrix<Scalar,Dynamic,1> VectorType;
+  
+  RealScalar tol = tol_error;
+  int maxIters = iters;
+  
+  int n = mat.cols();
+  VectorType residual = rhs - mat * x; //initial residual
+  VectorType p(n);
+
+  p = precond.solve(residual);      //initial search direction
+
+  VectorType z(n), tmp(n);
+  RealScalar absNew = internal::real(residual.dot(p));  // the square of the absolute value of r scaled by invM
+  RealScalar absInit = absNew;          // the initial absolute value
+
+  int i = 0;
+  while ((i < maxIters) && (absNew > tol*tol*absInit))
+  {
+    tmp.noalias() = mat * p;              // the bottleneck of the algorithm
+
+    Scalar alpha = absNew / p.dot(tmp);   // the amount we travel on dir
+    x += alpha * p;                       // update solution
+    residual -= alpha * tmp;              // update residue
+    z = precond.solve(residual);          // approximately solve for "A z = residual"
+
+    RealScalar absOld = absNew;
+    absNew = internal::real(residual.dot(z));     // update the absolute value of r
+    RealScalar beta = absNew / absOld;            // calculate the Gram-Schmidit value used to create the new search direction
+    p = z + beta * p;                             // update search direction
+    i++;
+  }
+
+  tol_error = sqrt(abs(absNew / absInit));
+  iters = i;
+}
+
+}
+
+template< typename _MatrixType, int _UpLo=Lower,
+          typename _Preconditioner = DiagonalPreconditioner<typename _MatrixType::Scalar> >
+class ConjugateGradient;
+
+namespace internal {
+
+template< typename _MatrixType, int _UpLo, typename _Preconditioner>
+struct traits<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
+{
+  typedef _MatrixType MatrixType;
+  typedef _Preconditioner Preconditioner;
+};
+
+}
+
+/** \ingroup IterativeLinearSolvers_Module
+  * \brief A conjugate gradient solver for sparse self-adjoint problems
+  *
+  * This class allows to solve for A.x = b sparse linear problems using a conjugate gradient algorithm.
+  * The sparse matrix A must be selfadjoint. The vectors x and b can be either dense or sparse.
+  *
+  * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix.
+  * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
+  *               or Upper. Default is Lower.
+  * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
+  *
+  * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
+  * and setTolerance() methods. The default are 1000 max iterations and NumTraits<Scalar>::epsilon()
+  * for the tolerance.
+  * 
+  * This class can be used as the direct solver classes. Here is a typical usage example:
+  * \code
+  * int n = 10000;
+  * VectorXd x(n), b(n);
+  * SparseMatrix<double> A(n,n);
+  * // fill A and b
+  * ConjugateGradient<SparseMatrix<double> > cg;
+  * cg.compute(A);
+  * x = cg.solve(b);
+  * std::cout << "#iterations:     " << cg.iterations() << std::endl;
+  * std::cout << "estimated error: " << cg.error()      << std::endl;
+  * // update b, and solve again
+  * x = cg.solve(b);
+  * \endcode
+  * 
+  * By default the iterations start with x=0 as an initial guess of the solution.
+  * One can control the start using the solveWithGuess() method. Here is a step by
+  * step execution example starting with a random guess and printing the evolution
+  * of the estimated error:
+  * * \code
+  * x = VectorXd::Random(n);
+  * cg.setMaxIterations(1);
+  * int i = 0;
+  * do {
+  *   x = cg.solveWithGuess(b,x);
+  *   std::cout << i << " : " << cg.error() << std::endl;
+  *   ++i;
+  * } while (cg.info()!=Success && i<100);
+  * \endcode
+  * Note that such a step by step excution is slightly slower.
+  * 
+  * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
+  */
+template< typename _MatrixType, int _UpLo, typename _Preconditioner>
+class ConjugateGradient : public IterativeSolverBase<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
+{
+  typedef IterativeSolverBase<ConjugateGradient> Base;
+  using Base::mp_matrix;
+  using Base::m_error;
+  using Base::m_iterations;
+  using Base::m_info;
+  using Base::m_isInitialized;
+public:
+  typedef _MatrixType MatrixType;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::Index Index;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef _Preconditioner Preconditioner;
+
+  enum {
+    UpLo = _UpLo
+  };
+
+public:
+
+  /** Default constructor. */
+  ConjugateGradient() : Base() {}
+
+  /** Initialize the solver with matrix \a A for further \c Ax=b solving.
+    * 
+    * This constructor is a shortcut for the default constructor followed
+    * by a call to compute().
+    * 
+    * \warning this class stores a reference to the matrix A as well as some
+    * precomputed values that depend on it. Therefore, if \a A is changed
+    * this class becomes invalid. Call compute() to update it with the new
+    * matrix A, or modify a copy of A.
+    */
+  ConjugateGradient(const MatrixType& A) : Base(A) {}
+
+  ~ConjugateGradient() {}
+  
+  /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
+    * \a x0 as an initial solution.
+    *
+    * \sa compute()
+    */
+  template<typename Rhs,typename Guess>
+  inline const internal::solve_retval_with_guess<ConjugateGradient, Rhs, Guess>
+  solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const
+  {
+    eigen_assert(m_isInitialized && "ConjugateGradient is not initialized.");
+    eigen_assert(Base::rows()==b.rows()
+              && "ConjugateGradient::solve(): invalid number of rows of the right hand side matrix b");
+    return internal::solve_retval_with_guess
+            <ConjugateGradient, Rhs, Guess>(*this, b.derived(), x0);
+  }
+
+  /** \internal */
+  template<typename Rhs,typename Dest>
+  void _solveWithGuess(const Rhs& b, Dest& x) const
+  {
+    m_iterations = Base::m_maxIterations;
+    m_error = Base::m_tolerance;
+
+    for(int j=0; j<b.cols(); ++j)
+    {
+      m_iterations = Base::m_maxIterations;
+      m_error = Base::m_tolerance;
+
+      typename Dest::ColXpr xj(x,j);
+      internal::conjugate_gradient(mp_matrix->template selfadjointView<UpLo>(), b.col(j), xj,
+                                   Base::m_preconditioner, m_iterations, m_error);
+    }
+
+    m_isInitialized = true;
+    m_info = m_error <= Base::m_tolerance ? Success : NoConvergence;
+  }
+  
+  /** \internal */
+  template<typename Rhs,typename Dest>
+  void _solve(const Rhs& b, Dest& x) const
+  {
+    x.setOnes();
+    _solveWithGuess(b,x);
+  }
+
+protected:
+
+};
+
+
+namespace internal {
+
+template<typename _MatrixType, int _UpLo, typename _Preconditioner, typename Rhs>
+struct solve_retval<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner>, Rhs>
+  : solve_retval_base<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner>, Rhs>
+{
+  typedef ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> Dec;
+  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    dec()._solve(rhs(),dst);
+  }
+};
+
+}
+
+#endif // EIGEN_CONJUGATE_GRADIENT_H
--- a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
+++ b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
@@ -0,0 +1,226 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_ITERATIVE_SOLVER_BASE_H
+#define EIGEN_ITERATIVE_SOLVER_BASE_H
+
+
+/** \ingroup IterativeLinearSolvers_Module
+  * \brief Base class for linear iterative solvers
+  *
+  * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
+  */
+template< typename Derived>
+class IterativeSolverBase
+{
+public:
+  typedef typename internal::traits<Derived>::MatrixType MatrixType;
+  typedef typename internal::traits<Derived>::Preconditioner Preconditioner;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::Index Index;
+  typedef typename MatrixType::RealScalar RealScalar;
+
+public:
+
+  Derived& derived() { return *static_cast<Derived*>(this); }
+  const Derived& derived() const { return *static_cast<const Derived*>(this); }
+
+  /** Default constructor. */
+  IterativeSolverBase()
+    : mp_matrix(0)
+  {
+    init();
+  }
+
+  /** Initialize the solver with matrix \a A for further \c Ax=b solving.
+    * 
+    * This constructor is a shortcut for the default constructor followed
+    * by a call to compute().
+    * 
+    * \warning this class stores a reference to the matrix A as well as some
+    * precomputed values that depend on it. Therefore, if \a A is changed
+    * this class becomes invalid. Call compute() to update it with the new
+    * matrix A, or modify a copy of A.
+    */
+  IterativeSolverBase(const MatrixType& A)
+  {
+    init();
+    compute(A);
+  }
+
+  ~IterativeSolverBase() {}
+
+  /** Initializes the iterative solver with the matrix \a A for further solving \c Ax=b problems.
+    *
+    * Currently, this function mostly initialized/compute the preconditioner. In the future
+    * we might, for instance, implement column reodering for faster matrix vector products.
+    *
+    * \warning this class stores a reference to the matrix A as well as some
+    * precomputed values that depend on it. Therefore, if \a A is changed
+    * this class becomes invalid. Call compute() to update it with the new
+    * matrix A, or modify a copy of A.
+    */
+  Derived& compute(const MatrixType& A)
+  {
+    mp_matrix = &A;
+    m_preconditioner.compute(A);
+    m_isInitialized = true;
+    m_info = Success;
+    return derived();
+  }
+
+  /** \internal */
+  Index rows() const { return mp_matrix->rows(); }
+  /** \internal */
+  Index cols() const { return mp_matrix->cols(); }
+
+  /** \returns the tolerance threshold used by the stopping criteria */
+  RealScalar tolerance() const { return m_tolerance; }
+  
+  /** Sets the tolerance threshold used by the stopping criteria */
+  Derived& setTolerance(RealScalar tolerance)
+  {
+    m_tolerance = tolerance;
+    return derived();
+  }
+
+  /** \returns a read-write reference to the preconditioner for custom configuration. */
+  Preconditioner& preconditioner() { return m_preconditioner; }
+  
+  /** \returns a read-only reference to the preconditioner. */
+  const Preconditioner& preconditioner() const { return m_preconditioner; }
+
+  /** \returns the max number of iterations */
+  int maxIterations() const { return m_maxIterations; }
+  
+  /** Sets the max number of iterations */
+  Derived& setMaxIterations(int maxIters)
+  {
+    m_maxIterations = maxIters;
+    return derived();
+  }
+
+  /** \returns the number of iterations performed during the last solve */
+  int iterations() const
+  {
+    eigen_assert(m_isInitialized && "ConjugateGradient is not initialized.");
+    return m_iterations;
+  }
+
+  /** \returns the tolerance error reached during the last solve */
+  RealScalar error() const
+  {
+    eigen_assert(m_isInitialized && "ConjugateGradient is not initialized.");
+    return m_error;
+  }
+
+  /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
+    *
+    * \sa compute()
+    */
+  template<typename Rhs> inline const internal::solve_retval<Derived, Rhs>
+  solve(const MatrixBase<Rhs>& b) const
+  {
+    eigen_assert(m_isInitialized && "IterativeSolverBase is not initialized.");
+    eigen_assert(rows()==b.rows()
+              && "IterativeSolverBase::solve(): invalid number of rows of the right hand side matrix b");
+    return internal::solve_retval<Derived, Rhs>(derived(), b.derived());
+  }
+  
+  /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
+    *
+    * \sa compute()
+    */
+  template<typename Rhs>
+  inline const internal::sparse_solve_retval<IterativeSolverBase, Rhs>
+  solve(const SparseMatrixBase<Rhs>& b) const
+  {
+    eigen_assert(m_isInitialized && "IterativeSolverBase is not initialized.");
+    eigen_assert(rows()==b.rows()
+              && "IterativeSolverBase::solve(): invalid number of rows of the right hand side matrix b");
+    return internal::sparse_solve_retval<IterativeSolverBase, Rhs>(*this, b.derived());
+  }
+
+  /** \returns Success if the iterations converged, and NoConvergence otherwise. */
+  ComputationInfo info() const
+  {
+    eigen_assert(m_isInitialized && "IterativeSolverBase is not initialized.");
+    return m_info;
+  }
+  
+  /** \internal */
+  template<typename Rhs, typename DestScalar, int DestOptions, typename DestIndex>
+  void _solve_sparse(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
+  {
+    eigen_assert(rows()==b.rows());
+    
+    int rhsCols = b.cols();
+    int size = b.rows();
+    Eigen::Matrix<DestScalar,Dynamic,1> tb(size);
+    Eigen::Matrix<DestScalar,Dynamic,1> tx(size);
+    for(int k=0; k<rhsCols; ++k)
+    {
+      tb = b.col(k);
+      tx = derived().solve(tb);
+      dest.col(k) = tx.sparseView(0);
+    }
+  }
+
+protected:
+  void init()
+  {
+    m_isInitialized = false;
+    m_maxIterations = 1000;
+    m_tolerance = NumTraits<Scalar>::epsilon();
+  }
+  const MatrixType* mp_matrix;
+  Preconditioner m_preconditioner;
+
+  int m_maxIterations;
+  RealScalar m_tolerance;
+  
+  mutable RealScalar m_error;
+  mutable int m_iterations;
+  mutable ComputationInfo m_info;
+  mutable bool m_isInitialized;
+};
+
+namespace internal {
+ 
+template<typename Derived, typename Rhs>
+struct sparse_solve_retval<IterativeSolverBase<Derived>, Rhs>
+  : sparse_solve_retval_base<IterativeSolverBase<Derived>, Rhs>
+{
+  typedef IterativeSolverBase<Derived> Dec;
+  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    dec().derived()._solve_sparse(rhs(),dst);
+  }
+};
+
+}
+
+#endif // EIGEN_ITERATIVE_SOLVER_BASE_H
--- a/Eigen/src/Jacobi/Jacobi.h
+++ b/Eigen/src/Jacobi/Jacobi.h
@@ -104,9 +104,9 @@ bool JacobiRotation<Scalar>::makeJacobi(RealScalar x, Scalar y, RealScalar z)
  else
  {
    RealScalar tau = (x-z)/(RealScalar(2)*internal::abs(y));
-    RealScalar w = internal::sqrt(internal::abs2(tau) + 1);
+    RealScalar w = internal::sqrt(internal::abs2(tau) + RealScalar(1));
    RealScalar t;
-    if(tau>0)
+    if(tau>RealScalar(0))
    {
      t = RealScalar(1) / (tau + w);
    }
@@ -114,8 +114,8 @@ bool JacobiRotation<Scalar>::makeJacobi(RealScalar x, Scalar y, RealScalar z)
    {
      t = RealScalar(1) / (tau - w);
    }
-    RealScalar sign_t = t > 0 ? 1 : -1;
-    RealScalar n = RealScalar(1) / internal::sqrt(internal::abs2(t)+1);
+    RealScalar sign_t = t > RealScalar(0) ? RealScalar(1) : RealScalar(-1);
+    RealScalar n = RealScalar(1) / internal::sqrt(internal::abs2(t)+RealScalar(1));
    m_s = - sign_t * (internal::conj(y) / internal::abs(y)) * internal::abs(t) * n;
    m_c = n;
    return true;
@@ -221,15 +221,15 @@ template<typename Scalar>
 void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::false_type)
 {

-  if(q==0)
+  if(q==Scalar(0))
  {
    m_c = p<Scalar(0) ? Scalar(-1) : Scalar(1);
-    m_s = 0;
+    m_s = Scalar(0);
    if(r) *r = internal::abs(p);
  }
-  else if(p==0)
+  else if(p==Scalar(0))
  {
-    m_c = 0;
+    m_c = Scalar(0);
    m_s = q<Scalar(0) ? Scalar(1) : Scalar(-1);
    if(r) *r = internal::abs(q);
  }
--- a/Show More
+++ b/Show More