fix compilation for old but not so old versions of glew

bump
2026-04-10 11:34:33 +08:00 · 2011-03-18 10:26:21 +01:00 · 2011-03-18 05:13:34 -04:00
467 changed files with 16932 additions and 31612 deletions
--- a/.hgeol
+++ b/.hgeol
@@ -1,8 +1,3 @@
-[patterns]
-scripts/*.in = LF
-debug/msvc/*.dat = CRLF
-unsupported/test/mpreal/*.* = CRLF
-** = native
-
-[repository]
-native = LF
+[patterns]
+**.* = native
+eigen_autoexp_part.dat = CRLF
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,10 +64,6 @@ set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)

 find_package(StandardMathLibrary)

-
-set(EIGEN_TEST_CUSTOM_LINKER_FLAGS  "" CACHE STRING "Additional linker flags when linking unit tests.")
-set(EIGEN_TEST_CUSTOM_CXX_FLAGS     "" CACHE STRING "Additional compiler flags when compiling unit tests.")
-
 set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "")

 if(NOT STANDARD_MATH_LIBRARY_FOUND)
@@ -107,8 +103,6 @@ endif()

 add_definitions("-DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS")

-set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")
-
 if(CMAKE_COMPILER_IS_GNUCXX)
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing")
  set(CMAKE_CXX_FLAGS_DEBUG "-g3")
@@ -285,21 +279,9 @@ install(FILES
  )

 if(EIGEN_BUILD_PKGCONFIG)
-    SET(path_separator ":")
-    STRING(REPLACE ${path_separator} ";" pkg_config_libdir_search "$ENV{PKG_CONFIG_LIBDIR}")
-    message(STATUS "searching for 'pkgconfig' directory in PKG_CONFIG_LIBDIR ( $ENV{PKG_CONFIG_LIBDIR} ), ${CMAKE_INSTALL_PREFIX}/share, and ${CMAKE_INSTALL_PREFIX}/lib")
-    FIND_PATH(pkg_config_libdir pkgconfig ${pkg_config_libdir_search} ${CMAKE_INSTALL_PREFIX}/share ${CMAKE_INSTALL_PREFIX}/lib ${pkg_config_libdir_search})
-    if(pkg_config_libdir)
-        SET(pkg_config_install_dir ${pkg_config_libdir})
-        message(STATUS "found ${pkg_config_libdir}/pkgconfig" )
-    else(pkg_config_libdir)
-        SET(pkg_config_install_dir ${CMAKE_INSTALL_PREFIX}/share)
-        message(STATUS "pkgconfig not found; installing in ${pkg_config_install_dir}" )
-    endif(pkg_config_libdir)
-
    configure_file(eigen3.pc.in eigen3.pc)
    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc
-        DESTINATION ${pkg_config_install_dir}/pkgconfig
+        DESTINATION share/pkgconfig
        )
 endif(EIGEN_BUILD_PKGCONFIG)

@@ -307,9 +289,44 @@ add_subdirectory(Eigen)

 add_subdirectory(doc EXCLUDE_FROM_ALL)

-include(EigenConfigureTesting)
-# fixme, not sure this line is still needed:
+add_custom_target(buildtests)
+add_custom_target(check COMMAND "ctest")
+add_dependencies(check buildtests)
+
+# CMake/Ctest does not allow us to change the build command,
+# so we have to workaround by directly editing the generated DartConfiguration.tcl file
+# save CMAKE_MAKE_PROGRAM
+set(CMAKE_MAKE_PROGRAM_SAVE ${CMAKE_MAKE_PROGRAM})
+# and set a fake one
+set(CMAKE_MAKE_PROGRAM "@EIGEN_MAKECOMMAND_PLACEHOLDER@")
+
+include(CTest)
 enable_testing() # must be called from the root CMakeLists, see man page
+include(EigenTesting)
+ei_init_testing()
+
+# overwrite default DartConfiguration.tcl
+# The worarounds are different for each version of the MSVC IDE
+if(MSVC_IDE)
+  if(MSVC_VERSION EQUAL 1600) # MSVC 2010
+    set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests.vcxproj /p:Configuration=\${CTEST_CONFIGURATION_TYPE} \n # ")
+  else() # MSVC 2008 (TODO check MSVC 2005)
+    set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} /project buildtests")
+  endif()
+else()
+  # for make and nmake
+  set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests")
+endif()
+
+configure_file(${CMAKE_BINARY_DIR}/DartConfiguration.tcl ${CMAKE_BINARY_DIR}/DartConfiguration.tcl)
+# restore default CMAKE_MAKE_PROGRAM
+set(CMAKE_MAKE_PROGRAM ${CMAKE_MAKE_PROGRAM_SAVE})
+# un-set temporary variables so that it is like they never existed. 
+# CMake 2.6.3 introduces the more logical unset() syntax for this.
+set(CMAKE_MAKE_PROGRAM_SAVE) 
+set(EIGEN_MAKECOMMAND_PLACEHOLDER)
+
+configure_file(${CMAKE_SOURCE_DIR}/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake)


 if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
@@ -318,13 +335,15 @@ else()
  add_subdirectory(test EXCLUDE_FROM_ALL)
 endif()

-if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
-  add_subdirectory(blas)
-  add_subdirectory(lapack)
-else()
-  add_subdirectory(blas EXCLUDE_FROM_ALL)
-  add_subdirectory(lapack EXCLUDE_FROM_ALL)
-endif()
+if(NOT MSVC)
+  if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
+    add_subdirectory(blas)
+    add_subdirectory(lapack)
+  else()
+    add_subdirectory(blas EXCLUDE_FROM_ALL)
+    add_subdirectory(lapack EXCLUDE_FROM_ALL)
+  endif()
+endif(NOT MSVC)

 add_subdirectory(unsupported)

--- a/COPYING.BSD
+++ b/COPYING.BSD
@@ -1,26 +0,0 @@
-/*
- Copyright (c) 2011, Intel Corporation. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without modification,
- are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its contributors may
-   be used to endorse or promote products derived from this software without
-   specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
--- a/Eigen/Cholesky
+++ b/Eigen/Cholesky
@@ -24,9 +24,6 @@ namespace Eigen {
 #include "src/misc/Solve.h"
 #include "src/Cholesky/LLT.h"
 #include "src/Cholesky/LDLT.h"
-#ifdef EIGEN_USE_LAPACKE
-#include "src/Cholesky/LLT_MKL.h"
-#endif

 } // namespace Eigen

--- a/Eigen/CholmodSupport
+++ b/Eigen/CholmodSupport
@@ -1,34 +0,0 @@
-#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H
-#define EIGEN_CHOLMODSUPPORT_MODULE_H
-
-#include "SparseCore"
-
-#include "src/Core/util/DisableStupidWarnings.h"
-
-extern "C" {
-  #include <cholmod.h>
-}
-
-namespace Eigen {
-
-/** \ingroup Support_modules
-  * \defgroup CholmodSupport_Module CholmodSupport module
-  *
-  *
-  * \code
-  * #include <Eigen/CholmodSupport>
-  * \endcode
-  */
-
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
-#include "src/CholmodSupport/CholmodSupport.h"
-
-
-} // namespace Eigen
-
-#include "src/Core/util/ReenableStupidWarnings.h"
-
-#endif // EIGEN_CHOLMODSUPPORT_MODULE_H
-
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -34,10 +34,6 @@
 // defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
 #include "src/Core/util/Macros.h"

-// this include file manages BLAS and MKL related macros
-// and inclusion of their respective header files
-#include "src/Core/util/MKL_support.h"
-
 // if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
 // account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
 #if !EIGEN_ALIGN
@@ -55,16 +51,16 @@
      #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
    #endif
  #endif
-#else
-  // Remember that usage of defined() in a #define is undefined by the standard
-  #if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
-    #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
-  #endif
+#endif
+
+// Remember that usage of defined() in a #define is undefined by the standard
+#if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
+  #define EIGEN_SSE2_BUT_NOT_OLD_GCC
 #endif

 #ifndef EIGEN_DONT_VECTORIZE

-  #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
+  #if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)

    // Defines symbols for compile-time detection of which instructions are
    // used.
@@ -147,7 +143,6 @@
 #ifdef EIGEN_HAS_ERRNO
 #include <cerrno>
 #endif
-#include <cstddef>
 #include <cstdlib>
 #include <cmath>
 #include <complex>
@@ -171,7 +166,7 @@
  #include <intrin.h>
 #endif

-#if defined(_CPPUNWIND) || defined(__EXCEPTIONS)
+#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_NO_EXCEPTIONS)
  #define EIGEN_EXCEPTIONS
 #endif

@@ -179,7 +174,16 @@
  #include <new>
 #endif

-/** \brief Namespace containing all symbols from the %Eigen library. */
+// this needs to be done after all possible windows C header includes and before any Eigen source includes
+// (system C++ includes are supposed to be able to deal with this already):
+// windows.h defines min and max macros which would make Eigen fail to compile.
+#if defined(min) || defined(max)
+#error The preprocessor symbols 'min' or 'max' are defined. If you are compiling on Windows, do #define NOMINMAX to prevent windows.h from defining these symbols.
+#endif
+
+// defined in bits/termios.h
+#undef B0
+
 namespace Eigen {

 inline static const char *SimdInstructionSetsInUse(void) {
@@ -235,8 +239,6 @@ inline static const char *SimdInstructionSetsInUse(void) {
 // we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
 // ensure QNX/QCC support
 using std::size_t;
-// gcc 4.6.0 wants std:: for ptrdiff_t 
-using std::ptrdiff_t;

 /** \defgroup Core_Module Core module
  * This is the main module of Eigen providing dense matrix and vector support
@@ -248,10 +250,6 @@ using std::ptrdiff_t;
  * \endcode
  */

-/** \defgroup Support_modules Support modules [category]
-  * Category of modules which add support for external libraries.
-  */
-
 #include "src/Core/util/Constants.h"
 #include "src/Core/util/ForwardDeclarations.h"
 #include "src/Core/util/Meta.h"
@@ -323,7 +321,7 @@ using std::ptrdiff_t;
 #include "src/Core/CommaInitializer.h"
 #include "src/Core/Flagged.h"
 #include "src/Core/ProductBase.h"
-#include "src/Core/GeneralProduct.h"
+#include "src/Core/Product.h"
 #include "src/Core/TriangularMatrix.h"
 #include "src/Core/SelfAdjointView.h"
 #include "src/Core/SolveTriangular.h"
@@ -352,21 +350,6 @@ using std::ptrdiff_t;
 #include "src/Core/ArrayBase.h"
 #include "src/Core/ArrayWrapper.h"

-#ifdef EIGEN_USE_BLAS
-#include "src/Core/products/GeneralMatrixMatrix_MKL.h"
-#include "src/Core/products/GeneralMatrixVector_MKL.h"
-#include "src/Core/products/GeneralMatrixMatrixTriangular_MKL.h"
-#include "src/Core/products/SelfadjointMatrixMatrix_MKL.h"
-#include "src/Core/products/SelfadjointMatrixVector_MKL.h"
-#include "src/Core/products/TriangularMatrixMatrix_MKL.h"
-#include "src/Core/products/TriangularMatrixVector_MKL.h"
-#include "src/Core/products/TriangularSolverMatrix_MKL.h"
-#endif // EIGEN_USE_BLAS
-
-#ifdef EIGEN_USE_MKL_VML
-#include "src/Core/Assign_MKL.h"
-#endif
-
 } // namespace Eigen

 #include "src/Core/GlobalFunctions.h"
--- a/Eigen/Eigen2Support
+++ b/Eigen/Eigen2Support
@@ -33,8 +33,7 @@

 namespace Eigen {

-/** \ingroup Support_modules
-  * \defgroup Eigen2Support_Module Eigen2 support module
+/** \defgroup Eigen2Support_Module Eigen2 support module
  * This module provides a couple of deprecated functions improving the compatibility with Eigen2.
  *
  * To use it, define EIGEN2_SUPPORT before including any Eigen header
@@ -64,24 +63,6 @@ namespace Eigen {
 // Eigen2 used to include iostream
 #include<iostream>

-#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
-using Eigen::Matrix##SizeSuffix##TypeSuffix; \
-using Eigen::Vector##SizeSuffix##TypeSuffix; \
-using Eigen::RowVector##SizeSuffix##TypeSuffix;
-
-#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
-
-#define EIGEN_USING_MATRIX_TYPEDEFS \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
-EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
-
 #define USING_PART_OF_NAMESPACE_EIGEN \
 EIGEN_USING_MATRIX_TYPEDEFS \
 using Eigen::Matrix; \
--- a/Eigen/Eigenvalues
+++ b/Eigen/Eigenvalues
@@ -9,7 +9,6 @@
 #include "Jacobi"
 #include "Householder"
 #include "LU"
-#include "Geometry"

 namespace Eigen {

@@ -36,11 +35,6 @@ namespace Eigen {
 #include "src/Eigenvalues/ComplexSchur.h"
 #include "src/Eigenvalues/ComplexEigenSolver.h"
 #include "src/Eigenvalues/MatrixBaseEigenvalues.h"
-#ifdef EIGEN_USE_LAPACKE
-#include "src/Eigenvalues/RealSchur_MKL.h"
-#include "src/Eigenvalues/ComplexSchur_MKL.h"
-#include "src/Eigenvalues/SelfAdjointEigenSolver_MKL.h"
-#endif

 } // namespace Eigen

--- a/Eigen/IterativeLinearSolvers
+++ b/Eigen/IterativeLinearSolvers
@@ -1,37 +0,0 @@
-#ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
-#define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
-
-#include "SparseCore"
-
-#include "src/Core/util/DisableStupidWarnings.h"
-
-namespace Eigen {
-
-/** \ingroup Sparse_modules
-  * \defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module
-  *
-  * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse.
-  * Those solvers are accessible via the following classes:
-  *  - ConjugateGradient for selfadjoint (hermitian) matrices,
-  *  - BiCGSTAB for general square matrices.
-  *
-  * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.
-  *
-  * \code
-  * #include <Eigen/IterativeLinearSolvers>
-  * \endcode
-  */
-
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
-#include "src/IterativeLinearSolvers/IterativeSolverBase.h"
-#include "src/IterativeLinearSolvers/BasicPreconditioners.h"
-#include "src/IterativeLinearSolvers/ConjugateGradient.h"
-#include "src/IterativeLinearSolvers/BiCGSTAB.h"
-
-} // namespace Eigen
-
-#include "src/Core/util/ReenableStupidWarnings.h"
-
-#endif // EIGEN_ITERATIVELINEARSOLVERS_MODULE_H
--- a/Eigen/LU
+++ b/Eigen/LU
@@ -23,9 +23,6 @@ namespace Eigen {
 #include "src/misc/Image.h"
 #include "src/LU/FullPivLU.h"
 #include "src/LU/PartialPivLU.h"
-#ifdef EIGEN_USE_LAPACKE
-#include "src/LU/PartialPivLU_MKL.h"
-#endif
 #include "src/LU/Determinant.h"
 #include "src/LU/Inverse.h"

--- a/Eigen/OrderingMethods
+++ b/Eigen/OrderingMethods
@@ -1,27 +0,0 @@
-#ifndef EIGEN_ORDERINGMETHODS_MODULE_H
-#define EIGEN_ORDERINGMETHODS_MODULE_H
-
-#include "SparseCore"
-
-#include "src/Core/util/DisableStupidWarnings.h"
-
-namespace Eigen {
-
-/** \ingroup Sparse_modules
-  * \defgroup OrderingMethods_Module OrderingMethods module
-  *
-  * This module is currently for internal use only.
-  *
-  *
-  * \code
-  * #include <Eigen/OrderingMethods>
-  * \endcode
-  */
-
-#include "src/OrderingMethods/Amd.h"
-
-} // namespace Eigen
-
-#include "src/Core/util/ReenableStupidWarnings.h"
-
-#endif // EIGEN_ORDERINGMETHODS_MODULE_H
--- a/Eigen/PARDISOSupport
+++ b/Eigen/PARDISOSupport
@@ -1,30 +0,0 @@
-#ifndef EIGEN_PARDISOSUPPORT_MODULE_H
-#define EIGEN_PARDISOSUPPORT_MODULE_H
-
-#include "SparseCore"
-
-#include "src/Core/util/DisableStupidWarnings.h"
-
-#include <mkl_pardiso.h>
-
-#include <unsupported/Eigen/SparseExtra>
-
-namespace Eigen {
-
-/** \ingroup Support_modules
-  * \defgroup PARDISOSupport_Module PARDISOSupport module
-  *
-  * This module brings support for the Intel(R) MKL PARDISO direct sparse solvers
-  *
-  * \code
-  * #include <Eigen/PARDISOSupport>
-  * \endcode
-  */
-
-#include "src/PARDISOSupport/PARDISOSupport.h"
-
-} // namespace Eigen
-
-#include "src/Core/util/ReenableStupidWarnings.h"
-
-#endif // EIGEN_PARDISOSUPPORT_MODULE_H
--- a/Eigen/QR
+++ b/Eigen/QR
@@ -28,10 +28,6 @@ namespace Eigen {
 #include "src/QR/HouseholderQR.h"
 #include "src/QR/FullPivHouseholderQR.h"
 #include "src/QR/ColPivHouseholderQR.h"
-#ifdef EIGEN_USE_LAPACKE
-#include "src/QR/HouseholderQR_MKL.h"
-#include "src/QR/ColPivHouseholderQR_MKL.h"
-#endif

 #ifdef EIGEN2_SUPPORT
 #include "src/Eigen2Support/QR.h"
--- a/Eigen/SVD
+++ b/Eigen/SVD
@@ -13,9 +13,9 @@ namespace Eigen {
  *
  *
  *
-  * This module provides SVD decomposition for matrices (both real and complex).
+  * This module provides SVD decomposition for (currently) real matrices.
  * This decomposition is accessible via the following MatrixBase method:
-  *  - MatrixBase::jacobiSvd()
+  *  - MatrixBase::svd()
  *
  * \code
  * #include <Eigen/SVD>
@@ -24,9 +24,6 @@ namespace Eigen {

 #include "src/misc/Solve.h"
 #include "src/SVD/JacobiSVD.h"
-#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
-#include "src/SVD/JacobiSVD_MKL.h"
-#endif
 #include "src/SVD/UpperBidiagonalization.h"

 #ifdef EIGEN2_SUPPORT
--- a/Eigen/Sparse
+++ b/Eigen/Sparse
@@ -1,27 +1,69 @@
 #ifndef EIGEN_SPARSE_MODULE_H
 #define EIGEN_SPARSE_MODULE_H

+#include "Core"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+#include <vector>
+#include <map>
+#include <cstdlib>
+#include <cstring>
+#include <algorithm>
+
+#ifdef EIGEN2_SUPPORT
+#define EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
+#endif
+
+#ifndef EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
+#error The sparse module API is not stable yet. To use it anyway, please define the EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET preprocessor token.
+#endif
+
 namespace Eigen {

-/** \defgroup Sparse_modules Sparse modules
+/** \defgroup Sparse_Module Sparse module
  *
-  * Meta-module including all related modules:
-  * - SparseCore
-  * - OrderingMethods
-  * - SparseCholesky
-  * - IterativeLinearSolvers
+  *
+  *
+  * See the \ref TutorialSparse "Sparse tutorial"
  *
  * \code
  * #include <Eigen/Sparse>
  * \endcode
  */

+/** The type used to identify a general sparse storage. */
+struct Sparse {};
+
+#include "src/Sparse/SparseUtil.h"
+#include "src/Sparse/SparseMatrixBase.h"
+#include "src/Sparse/CompressedStorage.h"
+#include "src/Sparse/AmbiVector.h"
+#include "src/Sparse/SparseMatrix.h"
+#include "src/Sparse/DynamicSparseMatrix.h"
+#include "src/Sparse/MappedSparseMatrix.h"
+#include "src/Sparse/SparseVector.h"
+#include "src/Sparse/CoreIterators.h"
+#include "src/Sparse/SparseBlock.h"
+#include "src/Sparse/SparseTranspose.h"
+#include "src/Sparse/SparseCwiseUnaryOp.h"
+#include "src/Sparse/SparseCwiseBinaryOp.h"
+#include "src/Sparse/SparseDot.h"
+#include "src/Sparse/SparseAssign.h"
+#include "src/Sparse/SparseRedux.h"
+#include "src/Sparse/SparseFuzzy.h"
+#include "src/Sparse/SparseProduct.h"
+#include "src/Sparse/SparseSparseProduct.h"
+#include "src/Sparse/SparseDenseProduct.h"
+#include "src/Sparse/SparseDiagonalProduct.h"
+#include "src/Sparse/SparseTriangularView.h"
+#include "src/Sparse/SparseSelfAdjointView.h"
+#include "src/Sparse/TriangularSolver.h"
+#include "src/Sparse/SparseView.h"
+
 } // namespace Eigen

-#include "SparseCore"
-#include "OrderingMethods"
-#include "SparseCholesky"
-#include "IterativeLinearSolvers"
+#include "src/Core/util/ReenableStupidWarnings.h"

 #endif // EIGEN_SPARSE_MODULE_H

--- a/Eigen/SparseCholesky
+++ b/Eigen/SparseCholesky
@@ -1,34 +0,0 @@
-#ifndef EIGEN_SPARSECHOLESKY_MODULE_H
-#define EIGEN_SPARSECHOLESKY_MODULE_H
-
-#include "SparseCore"
-
-#include "src/Core/util/DisableStupidWarnings.h"
-
-namespace Eigen {
-
-/** \ingroup Sparse_modules
-  * \defgroup SparseCholesky_Module SparseCholesky module
-  *
-  * This module currently provides two variants of the direct sparse Cholesky decomposition for selfadjoint (hermitian) matrices.
-  * Those decompositions are accessible via the following classes:
-  *  - SimplicialLLt,
-  *  - SimplicialLDLt
-  *
-  * Such problems can also be solved using the ConjugateGradient solver from the IterativeLinearSolvers module.
-  *
-  * \code
-  * #include <Eigen/SparseCholesky>
-  * \endcode
-  */
-
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
-#include "src/SparseCholesky/SimplicialCholesky.h"
-
-} // namespace Eigen
-
-#include "src/Core/util/ReenableStupidWarnings.h"
-
-#endif // EIGEN_SPARSECHOLESKY_MODULE_H
--- a/Eigen/SparseCore
+++ b/Eigen/SparseCore
@@ -1,65 +0,0 @@
-#ifndef EIGEN_SPARSECORE_MODULE_H
-#define EIGEN_SPARSECORE_MODULE_H
-
-#include "Core"
-
-#include "src/Core/util/DisableStupidWarnings.h"
-
-#include <vector>
-#include <map>
-#include <cstdlib>
-#include <cstring>
-#include <algorithm>
-
-namespace Eigen {
-
-/** \ingroup Sparse_modules
-  * \defgroup SparseCore_Module SparseCore module
-  *
-  * This module provides a sparse matrix representation, and basic associatd matrix manipulations
-  * and operations.
-  *
-  * See the \ref TutorialSparse "Sparse tutorial"
-  *
-  * \code
-  * #include <Eigen/SparseCore>
-  * \endcode
-  *
-  * This module depends on: Core.
-  */
-
-/** The type used to identify a general sparse storage. */
-struct Sparse {};
-
-#include "src/SparseCore/SparseUtil.h"
-#include "src/SparseCore/SparseMatrixBase.h"
-#include "src/SparseCore/CompressedStorage.h"
-#include "src/SparseCore/AmbiVector.h"
-#include "src/SparseCore/SparseMatrix.h"
-#include "src/SparseCore/MappedSparseMatrix.h"
-#include "src/SparseCore/SparseVector.h"
-#include "src/SparseCore/CoreIterators.h"
-#include "src/SparseCore/SparseBlock.h"
-#include "src/SparseCore/SparseTranspose.h"
-#include "src/SparseCore/SparseCwiseUnaryOp.h"
-#include "src/SparseCore/SparseCwiseBinaryOp.h"
-#include "src/SparseCore/SparseDot.h"
-#include "src/SparseCore/SparseAssign.h"
-#include "src/SparseCore/SparseRedux.h"
-#include "src/SparseCore/SparseFuzzy.h"
-#include "src/SparseCore/ConservativeSparseSparseProduct.h"
-#include "src/SparseCore/SparseSparseProductWithPruning.h"
-#include "src/SparseCore/SparseProduct.h"
-#include "src/SparseCore/SparseDenseProduct.h"
-#include "src/SparseCore/SparseDiagonalProduct.h"
-#include "src/SparseCore/SparseTriangularView.h"
-#include "src/SparseCore/SparseSelfAdjointView.h"
-#include "src/SparseCore/TriangularSolver.h"
-#include "src/SparseCore/SparseView.h"
-
-} // namespace Eigen
-
-#include "src/Core/util/ReenableStupidWarnings.h"
-
-#endif // EIGEN_SPARSECORE_MODULE_H
-
--- a/Eigen/SuperLUSupport
+++ b/Eigen/SuperLUSupport
@@ -1,53 +0,0 @@
-#ifndef EIGEN_SUPERLUSUPPORT_MODULE_H
-#define EIGEN_SUPERLUSUPPORT_MODULE_H
-
-#include "SparseCore"
-
-#include "src/Core/util/DisableStupidWarnings.h"
-
-#ifdef EMPTY
-#define EIGEN_EMPTY_WAS_ALREADY_DEFINED
-#endif
-
-typedef int int_t;
-#include <slu_Cnames.h>
-#include <supermatrix.h>
-#include <slu_util.h>
-
-// slu_util.h defines a preprocessor token named EMPTY which is really polluting,
-// so we remove it in favor of a SUPERLU_EMPTY token.
-// If EMPTY was already, defined then we don't undef it.
-
-#if defined(EIGEN_EMPTY_WAS_ALREADY_DEFINED)
-# undef EIGEN_EMPTY_WAS_ALREADY_DEFINED
-#elif defined(EMPTY)
-# undef EMPTY
-#endif
-
-#define SUPERLU_EMPTY (-1)
-
-namespace Eigen { struct SluMatrix; }
-
-namespace Eigen {
-
-/** \ingroup Support_modules
-  * \defgroup SuperLUSupport_Module SuperLUSupport module
-  *
-  * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting.
-  *
-  * \code
-  * #include <Eigen/SuperLUSupport>
-  * \endcode
-  */
-
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
-#include "src/SuperLUSupport/SuperLUSupport.h"
-
-
-} // namespace Eigen
-
-#include "src/Core/util/ReenableStupidWarnings.h"
-
-#endif // EIGEN_SUPERLUSUPPORT_MODULE_H
--- a/Eigen/UmfPackSupport
+++ b/Eigen/UmfPackSupport
@@ -1,34 +0,0 @@
-#ifndef EIGEN_UMFPACKSUPPORT_MODULE_H
-#define EIGEN_UMFPACKSUPPORT_MODULE_H
-
-#include "SparseCore"
-
-#include "src/Core/util/DisableStupidWarnings.h"
-
-extern "C" {
-#include <umfpack.h>
-}
-
-namespace Eigen {
-
-/** \ingroup Support_modules
-  * \defgroup UmfPackSupport_Module UmfPackSupport module
-  *
-  *
-  *
-  *
-  * \code
-  * #include <Eigen/UmfPackSupport>
-  * \endcode
-  */
-
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
-#include "src/UmfPackSupport/UmfPackSupport.h"
-
-} // namespace Eigen
-
-#include "src/Core/util/ReenableStupidWarnings.h"
-
-#endif // EIGEN_UMFPACKSUPPORT_MODULE_H
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -1,10 +1,9 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2009 Keir Mierle <mierle@gmail.com>
 // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2011 Timothy E. Holy <tim.holy@gmail.com >
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -32,15 +31,13 @@ namespace internal {
 template<typename MatrixType, int UpLo> struct LDLT_Traits;
 }

-/** \ingroup Cholesky_Module
+/** \ingroup cholesky_Module
  *
  * \class LDLT
  *
  * \brief Robust Cholesky decomposition of a matrix with pivoting
  *
  * \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
-  * \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
-  *             The other triangular part won't be read.
  *
  * Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
  * matrix \f$ A \f$ such that \f$ A =  P^TLDL^*P \f$, where P is a permutation matrix, L
@@ -51,10 +48,14 @@ template<typename MatrixType, int UpLo> struct LDLT_Traits;
  * on D also stabilizes the computation.
  *
  * Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky
-  * decomposition to determine whether a system of equations has a solution.
+	* decomposition to determine whether a system of equations has a solution.
  *
  * \sa MatrixBase::ldlt(), class LLT
  */
+ /* THIS PART OF THE DOX IS CURRENTLY DISABLED BECAUSE INACCURATE BECAUSE OF BUG IN THE DECOMPOSITION CODE
+  * Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
+  * the strict lower part does not have to store correct values.
+  */
 template<typename _MatrixType, int _UpLo> class LDLT
 {
  public:
@@ -97,11 +98,6 @@ template<typename _MatrixType, int _UpLo> class LDLT
        m_isInitialized(false)
    {}

-    /** \brief Constructor with decomposition
-      *
-      * This calculates the decomposition for the input \a matrix.
-      * \sa LDLT(Index size)
-      */
    LDLT(const MatrixType& matrix)
      : m_matrix(matrix.rows(), matrix.cols()),
        m_transpositions(matrix.rows()),
@@ -111,14 +107,6 @@ template<typename _MatrixType, int _UpLo> class LDLT
      compute(matrix);
    }

-    /** Clear any existing decomposition
-     * \sa rankUpdate(w,sigma)
-     */
-    void setZero()
-    {
-      m_isInitialized = false;
-    }
-
    /** \returns a view of the upper triangular matrix U */
    inline typename Traits::MatrixU matrixU() const
    {
@@ -142,14 +130,14 @@ template<typename _MatrixType, int _UpLo> class LDLT
    }

    /** \returns the coefficients of the diagonal matrix D */
-    inline Diagonal<const MatrixType> vectorD() const
+    inline Diagonal<const MatrixType> vectorD(void) const
    {
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
      return m_matrix.diagonal();
    }

    /** \returns true if the matrix is positive (semidefinite) */
-    inline bool isPositive() const
+    inline bool isPositive(void) const
    {
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
      return m_sign == 1;
@@ -170,19 +158,10 @@ template<typename _MatrixType, int _UpLo> class LDLT
    }

    /** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A.
-      *
-      * This function also supports in-place solves using the syntax <tt>x = decompositionObject.solve(x)</tt> .
      *
      * \note_about_checking_solutions
      *
-      * More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$
-      * by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$, 
-      * \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then
-      * \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the
-      * least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
-      * computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular.
-      *
-      * \sa MatrixBase::ldlt()
+      * \sa solveInPlace(), MatrixBase::ldlt()
      */
    template<typename Rhs>
    inline const internal::solve_retval<LDLT, Rhs>
@@ -208,9 +187,6 @@ template<typename _MatrixType, int _UpLo> class LDLT

    LDLT& compute(const MatrixType& matrix);

-    template <typename Derived>
-    LDLT& rankUpdate(const MatrixBase<Derived>& w,RealScalar alpha=1);
-
    /** \returns the internal LDLT decomposition matrix
      *
      * TODO: document the storage layout
@@ -226,17 +202,6 @@ template<typename _MatrixType, int _UpLo> class LDLT
    inline Index rows() const { return m_matrix.rows(); }
    inline Index cols() const { return m_matrix.cols(); }

-    /** \brief Reports whether previous computation was successful.
-      *
-      * \returns \c Success if computation was succesful,
-      *          \c NumericalIssue if the matrix.appears to be negative.
-      */
-    ComputationInfo info() const
-    {
-      eigen_assert(m_isInitialized && "LDLT is not initialized.");
-      return Success;
-    }
-
  protected:

    /** \internal
@@ -275,7 +240,7 @@ template<> struct ldlt_inplace<Lower>
      return true;
    }

-    RealScalar cutoff(0), biggest_in_corner;
+    RealScalar cutoff = 0, biggest_in_corner;

    for (Index k = 0; k < size; ++k)
    {
@@ -343,61 +308,6 @@ template<> struct ldlt_inplace<Lower>

    return true;
  }
-
-  // Reference for the algorithm: Davis and Hager, "Multiple Rank
-  // Modifications of a Sparse Cholesky Factorization" (Algorithm 1)
-  // Trivial rearrangements of their computations (Timothy E. Holy)
-  // allow their algorithm to work for rank-1 updates even if the
-  // original matrix is not of full rank.
-  // Here only rank-1 updates are implemented, to reduce the
-  // requirement for intermediate storage and improve accuracy
-  template<typename MatrixType, typename WDerived>
-  static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, typename MatrixType::RealScalar sigma=1)
-  {
-    using internal::isfinite;
-    typedef typename MatrixType::Scalar Scalar;
-    typedef typename MatrixType::RealScalar RealScalar;
-    typedef typename MatrixType::Index Index;
-
-    const Index size = mat.rows();
-    eigen_assert(mat.cols() == size && w.size()==size);
-
-    RealScalar alpha = 1;
-
-    // Apply the update
-    for (Index j = 0; j < size; j++)
-    {
-      // Check for termination due to an original decomposition of low-rank
-      if (!isfinite(alpha))
-        break;
-
-      // Update the diagonal terms
-      RealScalar dj = real(mat.coeff(j,j));
-      Scalar wj = w.coeff(j);
-      RealScalar swj2 = sigma*abs2(wj);
-      RealScalar gamma = dj*alpha + swj2;
-
-      mat.coeffRef(j,j) += swj2/alpha;
-      alpha += swj2/dj;
-
-
-      // Update the terms of L
-      Index rs = size-j-1;
-      w.tail(rs) -= wj * mat.col(j).tail(rs);
-      if(gamma != 0)
-        mat.col(j).tail(rs) += (sigma*conj(wj)/gamma)*w.tail(rs);
-    }
-    return true;
-  }
-
-  template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
-  static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, typename MatrixType::RealScalar sigma=1)
-  {
-    // Apply the permutation to the input w
-    tmp = transpositions * w;
-
-    return ldlt_inplace<Lower>::updateInPlace(mat,tmp,sigma);
-  }
 };

 template<> struct ldlt_inplace<Upper>
@@ -408,29 +318,22 @@ template<> struct ldlt_inplace<Upper>
    Transpose<MatrixType> matt(mat);
    return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
  }
-
-  template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
-  static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, typename MatrixType::RealScalar sigma=1)
-  {
-    Transpose<MatrixType> matt(mat);
-    return ldlt_inplace<Lower>::update(matt, transpositions, tmp, w.conjugate(), sigma);
-  }
 };

 template<typename MatrixType> struct LDLT_Traits<MatrixType,Lower>
 {
-  typedef TriangularView<const MatrixType, UnitLower> MatrixL;
-  typedef TriangularView<const typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
-  static inline MatrixL getL(const MatrixType& m) { return m; }
-  static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
+  typedef TriangularView<MatrixType, UnitLower> MatrixL;
+  typedef TriangularView<typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
+  inline static MatrixL getL(const MatrixType& m) { return m; }
+  inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); }
 };

 template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
 {
-  typedef TriangularView<const typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
-  typedef TriangularView<const MatrixType, UnitUpper> MatrixU;
-  static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
-  static inline MatrixU getU(const MatrixType& m) { return m; }
+  typedef TriangularView<typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
+  typedef TriangularView<MatrixType, UnitUpper> MatrixU;
+  inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); }
+  inline static MatrixU getU(const MatrixType& m) { return m; }
 };

 } // end namespace internal
@@ -455,37 +358,6 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
  return *this;
 }

-/** Update the LDLT decomposition:  given A = L D L^T, efficiently compute the decomposition of A + sigma w w^T.
- * \param w a vector to be incorporated into the decomposition.
- * \param sigma a scalar, +1 for updates and -1 for "downdates," which correspond to removing previously-added column vectors. Optional; default value is +1.
- * \sa setZero()
-  */
-template<typename MatrixType, int _UpLo>
-template<typename Derived>
-LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w,typename NumTraits<typename MatrixType::Scalar>::Real sigma)
-{
-  const Index size = w.rows();
-  if (m_isInitialized)
-  {
-    eigen_assert(m_matrix.rows()==size);
-  }
-  else
-  {    
-    m_matrix.resize(size,size);
-    m_matrix.setZero();
-    m_transpositions.resize(size);
-    for (Index i = 0; i < size; i++)
-      m_transpositions.coeffRef(i) = i;
-    m_temporary.resize(size);
-    m_sign = sigma;
-    m_isInitialized = true;
-  }
-
-  internal::ldlt_inplace<UpLo>::update(m_matrix, m_transpositions, m_temporary, w, sigma);
-
-  return *this;
-}
-
 namespace internal {
 template<typename _MatrixType, int _UpLo, typename Rhs>
 struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
@@ -504,21 +376,7 @@ struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
    dec().matrixL().solveInPlace(dst);

    // dst = D^-1 (L^-1 P b)
-    // more precisely, use pseudo-inverse of D (see bug 241)
-    using std::abs;
-    using std::max;
-    typedef typename LDLTType::MatrixType MatrixType;
-    typedef typename LDLTType::Scalar Scalar;
-    typedef typename LDLTType::RealScalar RealScalar;
-    const Diagonal<const MatrixType> vectorD = dec().vectorD();
-    RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() * NumTraits<Scalar>::epsilon(),
-				 RealScalar(1) / NumTraits<RealScalar>::highest()); // motivated by LAPACK's xGELSS
-    for (Index i = 0; i < vectorD.size(); ++i) {
-      if(abs(vectorD(i)) > tolerance)
-	dst.row(i) /= vectorD(i);
-      else
-	dst.row(i).setZero();
-    }
+    dst = dec().vectorD().asDiagonal().inverse() * dst;

    // dst = L^-T (D^-1 L^-1 P b)
    dec().matrixU().solveInPlace(dst);
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -29,15 +29,13 @@ namespace internal{
 template<typename MatrixType, int UpLo> struct LLT_Traits;
 }

-/** \ingroup Cholesky_Module
+/** \ingroup cholesky_Module
  *
  * \class LLT
  *
  * \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
  *
  * \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
-  * \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
-  *             The other triangular part won't be read.
  *
  * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
  * matrix A such that A = LL^* = U^*U, where L is lower triangular.
@@ -51,9 +49,6 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
  * use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine whether a system of equations
  * has a solution.
  *
-  * Example: \include LLT_example.cpp
-  * Output: \verbinclude LLT_example.out
-  *    
  * \sa MatrixBase::llt(), class LDLT
  */
 /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
@@ -183,9 +178,6 @@ template<typename _MatrixType, int _UpLo> class LLT
    inline Index rows() const { return m_matrix.rows(); }
    inline Index cols() const { return m_matrix.cols(); }

-    template<typename VectorType>
-    LLT& rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
-
  protected:
    /** \internal
      * Used to compute and store L
@@ -198,15 +190,16 @@ template<typename _MatrixType, int _UpLo> class LLT

 namespace internal {

-template<typename Scalar, int UpLo> struct llt_inplace;
+template<int UpLo> struct llt_inplace;

-template<typename Scalar> struct llt_inplace<Scalar, Lower>
+template<> struct llt_inplace<Lower>
 {
-  typedef typename NumTraits<Scalar>::Real RealScalar;
  template<typename MatrixType>
  static typename MatrixType::Index unblocked(MatrixType& mat)
  {
    typedef typename MatrixType::Index Index;
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename MatrixType::RealScalar RealScalar;
    
    eigen_assert(mat.rows()==mat.cols());
    const Index size = mat.rows();
@@ -240,7 +233,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>

    Index blockSize = size/8;
    blockSize = (blockSize/16)*16;
-    blockSize = (std::min)((std::max)(blockSize,Index(8)), Index(128));
+    blockSize = std::min(std::max(blockSize,Index(8)), Index(128));

    for (Index k=0; k<size; k+=blockSize)
    {
@@ -248,7 +241,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
      //       A00 |  -  |  -
      // lu  = A10 | A11 |  -
      //       A20 | A21 | A22
-      Index bs = (std::min)(blockSize, size-k);
+      Index bs = std::min(blockSize, size-k);
      Index rs = size - k - bs;
      Block<MatrixType,Dynamic,Dynamic> A11(m,k,   k,   bs,bs);
      Block<MatrixType,Dynamic,Dynamic> A21(m,k+bs,k,   rs,bs);
@@ -261,133 +254,55 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
    }
    return -1;
  }
-
-  template<typename MatrixType, typename VectorType>
-  static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
-  {
-    typedef typename MatrixType::Index Index;
-    typedef typename MatrixType::ColXpr ColXpr;
-    typedef typename internal::remove_all<ColXpr>::type ColXprCleaned;
-    typedef typename ColXprCleaned::SegmentReturnType ColXprSegment;
-    typedef Matrix<Scalar,Dynamic,1> TempVectorType;
-    typedef typename TempVectorType::SegmentReturnType TempVecSegment;
-
-    int n = mat.cols();
-    eigen_assert(mat.rows()==n && vec.size()==n);
-
-    TempVectorType temp;
-
-    if(sigma>0)
-    {
-      // This version is based on Givens rotations.
-      // It is faster than the other one below, but only works for updates,
-      // i.e., for sigma > 0
-      temp = sqrt(sigma) * vec;
-
-      for(int i=0; i<n; ++i)
-      {
-        JacobiRotation<Scalar> g;
-        g.makeGivens(mat(i,i), -temp(i), &mat(i,i));
-
-        int rs = n-i-1;
-        if(rs>0)
-        {
-          ColXprSegment x(mat.col(i).tail(rs));
-          TempVecSegment y(temp.tail(rs));
-          apply_rotation_in_the_plane(x, y, g);
-        }
-      }
-    }
-    else
-    {
-      temp = vec;
-      RealScalar beta = 1;
-      for(int j=0; j<n; ++j)
-      {
-        RealScalar Ljj = real(mat.coeff(j,j));
-        RealScalar dj = abs2(Ljj);
-        Scalar wj = temp.coeff(j);
-        RealScalar swj2 = sigma*abs2(wj);
-        RealScalar gamma = dj*beta + swj2;
-
-        RealScalar x = dj + swj2/beta;
-        if (x<=RealScalar(0))
-          return j;
-        RealScalar nLjj = sqrt(x);
-        mat.coeffRef(j,j) = nLjj;
-        beta += swj2/dj;
-
-        // Update the terms of L
-        Index rs = n-j-1;
-        if(rs)
-        {
-          temp.tail(rs) -= (wj/Ljj) * mat.col(j).tail(rs);
-          if(gamma != 0)
-            mat.col(j).tail(rs) = (nLjj/Ljj) * mat.col(j).tail(rs) + (nLjj * sigma*conj(wj)/gamma)*temp.tail(rs);
-        }
-      }
-    }
-    return -1;
-  }
 };

-template<typename Scalar> struct llt_inplace<Scalar, Upper>
+template<> struct llt_inplace<Upper>
 {
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-
  template<typename MatrixType>
  static EIGEN_STRONG_INLINE typename MatrixType::Index unblocked(MatrixType& mat)
  {
    Transpose<MatrixType> matt(mat);
-    return llt_inplace<Scalar, Lower>::unblocked(matt);
+    return llt_inplace<Lower>::unblocked(matt);
  }
  template<typename MatrixType>
  static EIGEN_STRONG_INLINE typename MatrixType::Index blocked(MatrixType& mat)
  {
    Transpose<MatrixType> matt(mat);
-    return llt_inplace<Scalar, Lower>::blocked(matt);
-  }
-  template<typename MatrixType, typename VectorType>
-  static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
-  {
-    Transpose<MatrixType> matt(mat);
-    return llt_inplace<Scalar, Lower>::rankUpdate(matt, vec.conjugate(), sigma);
+    return llt_inplace<Lower>::blocked(matt);
  }
 };

 template<typename MatrixType> struct LLT_Traits<MatrixType,Lower>
 {
-  typedef TriangularView<const MatrixType, Lower> MatrixL;
-  typedef TriangularView<const typename MatrixType::AdjointReturnType, Upper> MatrixU;
-  static inline MatrixL getL(const MatrixType& m) { return m; }
-  static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
+  typedef TriangularView<MatrixType, Lower> MatrixL;
+  typedef TriangularView<typename MatrixType::AdjointReturnType, Upper> MatrixU;
+  inline static MatrixL getL(const MatrixType& m) { return m; }
+  inline static MatrixU getU(const MatrixType& m) { return m.adjoint(); }
  static bool inplace_decomposition(MatrixType& m)
-  { return llt_inplace<typename MatrixType::Scalar, Lower>::blocked(m)==-1; }
+  { return llt_inplace<Lower>::blocked(m)==-1; }
 };

 template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
 {
-  typedef TriangularView<const typename MatrixType::AdjointReturnType, Lower> MatrixL;
-  typedef TriangularView<const MatrixType, Upper> MatrixU;
-  static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
-  static inline MatrixU getU(const MatrixType& m) { return m; }
+  typedef TriangularView<typename MatrixType::AdjointReturnType, Lower> MatrixL;
+  typedef TriangularView<MatrixType, Upper> MatrixU;
+  inline static MatrixL getL(const MatrixType& m) { return m.adjoint(); }
+  inline static MatrixU getU(const MatrixType& m) { return m; }
  static bool inplace_decomposition(MatrixType& m)
-  { return llt_inplace<typename MatrixType::Scalar, Upper>::blocked(m)==-1; }
+  { return llt_inplace<Upper>::blocked(m)==-1; }
 };

 } // end namespace internal

 /** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix
  *
-  * \returns a reference to *this
  *
-  * Example: \include TutorialLinAlgComputeTwice.cpp
-  * Output: \verbinclude TutorialLinAlgComputeTwice.out
+  * \returns a reference to *this
  */
 template<typename MatrixType, int _UpLo>
 LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
 {
-  eigen_assert(a.rows()==a.cols());
+  assert(a.rows()==a.cols());
  const Index size = a.rows();
  m_matrix.resize(size, size);
  m_matrix = a;
@@ -399,26 +314,6 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
  return *this;
 }

-/** Performs a rank one update (or dowdate) of the current decomposition.
-  * If A = LL^* before the rank one update,
-  * then after it we have LL^* = A + sigma * v v^* where \a v must be a vector
-  * of same dimension.
-  */
-template<typename MatrixType, int _UpLo>
-template<typename VectorType>
-LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::rankUpdate(const VectorType& v, const RealScalar& sigma)
-{
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType);
-  eigen_assert(v.size()==m_matrix.cols());
-  eigen_assert(m_isInitialized);
-  if(internal::llt_inplace<typename MatrixType::Scalar, UpLo>::rankUpdate(m_matrix,v,sigma)>=0)
-    m_info = NumericalIssue;
-  else
-    m_info = Success;
-
-  return *this;
-}
-    
 namespace internal {
 template<typename _MatrixType, int UpLo, typename Rhs>
 struct solve_retval<LLT<_MatrixType, UpLo>, Rhs>
@@ -489,4 +384,3 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
 }

 #endif // EIGEN_LLT_H
-
--- a/Eigen/src/Cholesky/LLT_MKL.h
+++ b/Eigen/src/Cholesky/LLT_MKL.h
@@ -1,123 +0,0 @@
-/*
- Copyright (c) 2011, Intel Corporation. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without modification,
- are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its contributors may
-   be used to endorse or promote products derived from this software without
-   specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- ********************************************************************************
- *   Content : Eigen bindings to Intel(R) MKL
- *     LLt decomposition based on LAPACKE_?potrf function.
- ********************************************************************************
-*/
-
-#ifndef EIGEN_LLT_MKL_H
-#define EIGEN_LLT_MKL_H
-
-#include "Eigen/src/Core/util/MKL_support.h"
-#include <iostream>
-
-namespace internal {
-
-template<typename Scalar> struct mkl_llt;
-
-#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \
-template<> struct mkl_llt<EIGTYPE> \
-{ \
-  template<typename MatrixType> \
-  static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \
-  { \
-    lapack_int matrix_order; \
-    lapack_int size, lda, info, StorageOrder; \
-    EIGTYPE* a; \
-    eigen_assert(m.rows()==m.cols()); \
-/* Set up parameters for ?potrf */ \
-    size = m.rows(); \
-    StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \
-    matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
-    a = &(m.coeffRef(0,0)); \
-    lda = m.outerStride(); \
-\
-    info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \
-    info = (info==0) ? Success : NumericalIssue; \
-    return info; \
-  } \
-}; \
-template<> struct llt_inplace<EIGTYPE, Lower> \
-{ \
-  template<typename MatrixType> \
-  static typename MatrixType::Index blocked(MatrixType& m) \
-  { \
-    return mkl_llt<EIGTYPE>::potrf(m, 'L'); \
-  } \
-  template<typename MatrixType, typename VectorType> \
-  static void rankUpdate(MatrixType& mat, const VectorType& vec) \
-  { \
-    typedef typename MatrixType::ColXpr ColXpr; \
-    typedef typename internal::remove_all<ColXpr>::type ColXprCleaned; \
-    typedef typename ColXprCleaned::SegmentReturnType ColXprSegment; \
-    typedef typename MatrixType::Scalar Scalar; \
-    typedef Matrix<Scalar,Dynamic,1> TempVectorType; \
-    typedef typename TempVectorType::SegmentReturnType TempVecSegment; \
-\
-    int n = mat.cols(); \
-    eigen_assert(mat.rows()==n && vec.size()==n); \
-    TempVectorType temp(vec); \
-\
-    for(int i=0; i<n; ++i) \
-    { \
-      JacobiRotation<Scalar> g; \
-      g.makeGivens(mat(i,i), -temp(i), &mat(i,i)); \
-\
-      int rs = n-i-1; \
-      if(rs>0) \
-      { \
-        ColXprSegment x(mat.col(i).tail(rs)); \
-        TempVecSegment y(temp.tail(rs)); \
-        apply_rotation_in_the_plane(x, y, g); \
-      } \
-    } \
-  } \
-}; \
-template<> struct llt_inplace<EIGTYPE, Upper> \
-{ \
-  template<typename MatrixType> \
-  static typename MatrixType::Index blocked(MatrixType& m) \
-  { \
-    return mkl_llt<EIGTYPE>::potrf(m, 'U'); \
-  } \
-  template<typename MatrixType, typename VectorType> \
-  static void rankUpdate(MatrixType& mat, const VectorType& vec) \
-  { \
-    Transpose<MatrixType> matt(mat); \
-    return llt_inplace<EIGTYPE, Lower>::rankUpdate(matt, vec.conjugate()); \
-  } \
-};
-
-EIGEN_MKL_LLT(double, double, d)
-EIGEN_MKL_LLT(float, float, s)
-EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z)
-EIGEN_MKL_LLT(scomplex, MKL_Complex8, c)
-
-}
-
-#endif // EIGEN_LLT_MKL_H
--- a/Eigen/src/CholmodSupport/CMakeLists.txt
+++ b/Eigen/src/CholmodSupport/CMakeLists.txt
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_CholmodSupport_SRCS "*.h")
-
-INSTALL(FILES 
-  ${Eigen_CholmodSupport_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel
-  )
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -68,8 +68,10 @@ class Array
    friend struct internal::conservative_resize_like_impl;

    using Base::m_storage;
-
  public:
+    enum { NeedsToAlign = (!(Options&DontAlign))
+                          && SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
+    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)

    using Base::base;
    using Base::coeff;
--- a/Eigen/src/Core/ArrayBase.h
+++ b/Eigen/src/Core/ArrayBase.h
@@ -159,7 +159,7 @@ template<typename Derived> class ArrayBase
    /** \returns an \link MatrixBase Matrix \endlink expression of this array
      * \sa MatrixBase::array() */
    MatrixWrapper<Derived> matrix() { return derived(); }
-    const MatrixWrapper<const Derived> matrix() const { return derived(); }
+    const MatrixWrapper<Derived> matrix() const { return derived(); }

 //     template<typename Dest>
 //     inline void evalTo(Dest& dst) const { dst = matrix(); }
@@ -174,10 +174,10 @@ template<typename Derived> class ArrayBase
  protected:
    // mixing arrays and matrices is not legal
    template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& )
-    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+    {EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
    // mixing arrays and matrices is not legal
    template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& )
-    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+    {EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
 };

 /** replaces \c *this by \c *this - \a other.
--- a/Eigen/src/Core/ArrayWrapper.h
+++ b/Eigen/src/Core/ArrayWrapper.h
@@ -53,25 +53,16 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
    EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)

-    typedef typename internal::conditional<
-                       internal::is_lvalue<ExpressionType>::value,
-                       Scalar,
-                       const Scalar
-                     >::type ScalarWithConstIfNotLvalue;
-
    typedef typename internal::nested<ExpressionType>::type NestedExpressionType;

-    inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
+    inline ArrayWrapper(const ExpressionType& matrix) : m_expression(matrix) {}

    inline Index rows() const { return m_expression.rows(); }
    inline Index cols() const { return m_expression.cols(); }
    inline Index outerStride() const { return m_expression.outerStride(); }
    inline Index innerStride() const { return m_expression.innerStride(); }

-    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
-    inline const Scalar* data() const { return m_expression.data(); }
-
-    inline CoeffReturnType coeff(Index row, Index col) const
+    inline const CoeffReturnType coeff(Index row, Index col) const
    {
      return m_expression.coeff(row, col);
    }
@@ -86,7 +77,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
      return m_expression.const_cast_derived().coeffRef(row, col);
    }

-    inline CoeffReturnType coeff(Index index) const
+    inline const CoeffReturnType coeff(Index index) const
    {
      return m_expression.coeff(index);
    }
@@ -128,14 +119,8 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
    template<typename Dest>
    inline void evalTo(Dest& dst) const { dst = m_expression; }

-    const typename internal::remove_all<NestedExpressionType>::type& 
-    nestedExpression() const 
-    {
-      return m_expression;
-    }
-
  protected:
-    NestedExpressionType m_expression;
+    const NestedExpressionType m_expression;
 };

 /** \class MatrixWrapper
@@ -166,25 +151,16 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
    EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)

-    typedef typename internal::conditional<
-                       internal::is_lvalue<ExpressionType>::value,
-                       Scalar,
-                       const Scalar
-                     >::type ScalarWithConstIfNotLvalue;
-
    typedef typename internal::nested<ExpressionType>::type NestedExpressionType;

-    inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
+    inline MatrixWrapper(const ExpressionType& matrix) : m_expression(matrix) {}

    inline Index rows() const { return m_expression.rows(); }
    inline Index cols() const { return m_expression.cols(); }
    inline Index outerStride() const { return m_expression.outerStride(); }
    inline Index innerStride() const { return m_expression.innerStride(); }

-    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
-    inline const Scalar* data() const { return m_expression.data(); }
-
-    inline CoeffReturnType coeff(Index row, Index col) const
+    inline const CoeffReturnType coeff(Index row, Index col) const
    {
      return m_expression.coeff(row, col);
    }
@@ -199,7 +175,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
      return m_expression.derived().coeffRef(row, col);
    }

-    inline CoeffReturnType coeff(Index index) const
+    inline const CoeffReturnType coeff(Index index) const
    {
      return m_expression.coeff(index);
    }
@@ -238,14 +214,8 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
      m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
    }

-    const typename internal::remove_all<NestedExpressionType>::type& 
-    nestedExpression() const 
-    {
-      return m_expression;
-    }
-
  protected:
-    NestedExpressionType m_expression;
+    const NestedExpressionType m_expression;
 };

 #endif // EIGEN_ARRAYWRAPPER_H
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -152,7 +152,7 @@ struct assign_DefaultTraversal_CompleteUnrolling
    inner = Index % Derived1::InnerSizeAtCompileTime
  };

-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    dst.copyCoeffByOuterInner(outer, inner, src);
    assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
@@ -162,13 +162,13 @@ struct assign_DefaultTraversal_CompleteUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
+  EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
 };

 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_DefaultTraversal_InnerUnrolling
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
  {
    dst.copyCoeffByOuterInner(outer, Index, src);
    assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
@@ -178,7 +178,7 @@ struct assign_DefaultTraversal_InnerUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
+  EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
 };

 /***********************
@@ -188,7 +188,7 @@ struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_LinearTraversal_CompleteUnrolling
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    dst.copyCoeff(Index, src);
    assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
@@ -198,7 +198,7 @@ struct assign_LinearTraversal_CompleteUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
+  EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
 };

 /**************************
@@ -214,7 +214,7 @@ struct assign_innervec_CompleteUnrolling
    JointAlignment = assign_traits<Derived1,Derived2>::JointAlignment
  };

-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
    assign_innervec_CompleteUnrolling<Derived1, Derived2,
@@ -225,13 +225,13 @@ struct assign_innervec_CompleteUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
+  EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
 };

 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_innervec_InnerUnrolling
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
  {
    dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
    assign_innervec_InnerUnrolling<Derived1, Derived2,
@@ -242,7 +242,7 @@ struct assign_innervec_InnerUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
+  EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
 };

 /***************************************************************************
@@ -251,25 +251,24 @@ struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>

 template<typename Derived1, typename Derived2,
         int Traversal = assign_traits<Derived1, Derived2>::Traversal,
-         int Unrolling = assign_traits<Derived1, Derived2>::Unrolling,
-         int Version = Specialized>
+         int Unrolling = assign_traits<Derived1, Derived2>::Unrolling>
 struct assign_impl;

 /************************
 *** Default traversal ***
 ************************/

-template<typename Derived1, typename Derived2, int Unrolling, int Version>
-struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
+template<typename Derived1, typename Derived2, int Unrolling>
+struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling>
 {
-  static inline void run(Derived1 &, const Derived2 &) { }
+  inline static void run(Derived1 &, const Derived2 &) { }
 };

-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
+template<typename Derived1, typename Derived2>
+struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
 {
  typedef typename Derived1::Index Index;
-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    const Index innerSize = dst.innerSize();
    const Index outerSize = dst.outerSize();
@@ -279,21 +278,21 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
  }
 };

-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
+template<typename Derived1, typename Derived2>
+struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling>
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
      ::run(dst, src);
  }
 };

-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
+template<typename Derived1, typename Derived2>
+struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
 {
  typedef typename Derived1::Index Index;
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    const Index outerSize = dst.outerSize();
    for(Index outer = 0; outer < outerSize; ++outer)
@@ -306,11 +305,11 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version
 *** Linear traversal ***
 ***********************/

-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
+template<typename Derived1, typename Derived2>
+struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling>
 {
  typedef typename Derived1::Index Index;
-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    const Index size = dst.size();
    for(Index i = 0; i < size; ++i)
@@ -318,10 +317,10 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
  }
 };

-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
+template<typename Derived1, typename Derived2>
+struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling>
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
      ::run(dst, src);
@@ -332,11 +331,11 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Versi
 *** Inner vectorization ***
 **************************/

-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Version>
+template<typename Derived1, typename Derived2>
+struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
 {
  typedef typename Derived1::Index Index;
-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    const Index innerSize = dst.innerSize();
    const Index outerSize = dst.outerSize();
@@ -347,21 +346,21 @@ struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Ve
  }
 };

-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling, Version>
+template<typename Derived1, typename Derived2>
+struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling>
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
      ::run(dst, src);
  }
 };

-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling, Version>
+template<typename Derived1, typename Derived2>
+struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling>
 {
  typedef typename Derived1::Index Index;
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    const Index outerSize = dst.outerSize();
    for(Index outer = 0; outer < outerSize; ++outer)
@@ -399,11 +398,11 @@ struct unaligned_assign_impl<false>
  }
 };

-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, Version>
+template<typename Derived1, typename Derived2>
+struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
 {
  typedef typename Derived1::Index Index;
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    const Index size = dst.size();
    typedef packet_traits<typename Derived1::Scalar> PacketTraits;
@@ -413,7 +412,7 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, V
      srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
    };
    const Index alignedStart = assign_traits<Derived1,Derived2>::DstIsAligned ? 0
-                             : internal::first_aligned(&dst.coeffRef(0), size);
+                             : first_aligned(&dst.coeffRef(0), size);
    const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;

    unaligned_assign_impl<assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
@@ -427,11 +426,11 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, V
  }
 };

-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling, Version>
+template<typename Derived1, typename Derived2>
+struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling>
 {
  typedef typename Derived1::Index Index;
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    enum { size = Derived1::SizeAtCompileTime,
           packetSize = packet_traits<typename Derived1::Scalar>::size,
@@ -446,11 +445,11 @@ struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnroll
 *** Slice vectorization ***
 ***************************/

-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Version>
+template<typename Derived1, typename Derived2>
+struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
 {
  typedef typename Derived1::Index Index;
-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    typedef packet_traits<typename Derived1::Scalar> PacketTraits;
    enum {
@@ -464,7 +463,7 @@ struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Ve
    const Index outerSize = dst.outerSize();
    const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
    Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
-                       : internal::first_aligned(&dst.coeffRef(0,0), innerSize);
+                       : first_aligned(&dst.coeffRef(0,0), innerSize);

    for(Index outer = 0; outer < outerSize; ++outer)
    {
@@ -532,19 +531,19 @@ struct assign_selector;

 template<typename Derived, typename OtherDerived>
 struct assign_selector<Derived,OtherDerived,false,false> {
-  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
+  EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
 };
 template<typename Derived, typename OtherDerived>
 struct assign_selector<Derived,OtherDerived,true,false> {
-  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
+  EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
 };
 template<typename Derived, typename OtherDerived>
 struct assign_selector<Derived,OtherDerived,false,true> {
-  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
+  EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
 };
 template<typename Derived, typename OtherDerived>
 struct assign_selector<Derived,OtherDerived,true,true> {
-  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
+  EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
 };

 } // end namespace internal
--- a/Eigen/src/Core/Assign_MKL.h
+++ b/Eigen/src/Core/Assign_MKL.h
@@ -1,217 +0,0 @@
-/*
- Copyright (c) 2011, Intel Corporation. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without modification,
- are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its contributors may
-   be used to endorse or promote products derived from this software without
-   specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- ********************************************************************************
- *   Content : Eigen bindings to Intel(R) MKL
- *   MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
- ********************************************************************************
-*/
-
-#ifndef EIGEN_ASSIGN_VML_H
-#define EIGEN_ASSIGN_VML_H
-
-namespace internal {
-
-template<typename Op> struct vml_call
-{ enum { IsSupported = 0 }; };
-
-template<typename Dst, typename Src, typename UnaryOp>
-class vml_assign_traits
-{
-  private:
-    enum {
-      DstHasDirectAccess = Dst::Flags & DirectAccessBit,
-      SrcHasDirectAccess = Src::Flags & DirectAccessBit,
-
-      StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
-      InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
-                : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
-                : int(Dst::RowsAtCompileTime),
-      InnerMaxSize  = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
-                    : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
-                    : int(Dst::MaxRowsAtCompileTime),
-      MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
-
-      MightEnableVml =  vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
-                     && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
-      MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
-      VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
-      LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
-      MayEnableVml = MightEnableVml && LargeEnough,
-      MayLinearize = MayEnableVml && MightLinearize
-    };
-  public:
-    enum {
-      Traversal = MayLinearize ? LinearVectorizedTraversal
-                : MayEnableVml ? InnerVectorizedTraversal
-                : DefaultTraversal
-    };
-};
-
-template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
-         int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
-struct vml_assign_impl
-  : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
-{
-};
-
-template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
-struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
-{
-  typedef typename Derived1::Scalar Scalar;
-  typedef typename Derived1::Index Index;
-  static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
-  {
-    // in case we want to (or have to) skip VML at runtime we can call:
-    // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
-    const Index innerSize = dst.innerSize();
-    const Index outerSize = dst.outerSize();
-    for(Index outer = 0; outer < outerSize; ++outer) {
-      const Scalar *src_ptr = src.IsRowMajor ?  &(src.nestedExpression().coeffRef(outer,0)) :
-                                                &(src.nestedExpression().coeffRef(0, outer));
-      Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
-      vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
-    }
-  }
-};
-
-template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
-struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
-{
-  static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
-  {
-    // in case we want to (or have to) skip VML at runtime we can call:
-    // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
-    vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
-  }
-};
-
-// Macroses
-
-#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
-  template<typename Derived1, typename Derived2, typename UnaryOp> \
-  struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized>  {  \
-    static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
-      vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
-    } \
-  };
-
-EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
-EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
-EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
-EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
-EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
-EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
-EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
-EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
-EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
-EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
-EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
-
-
-#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
-#define  EIGEN_MKL_VML_MODE VML_HA
-#else
-#define  EIGEN_MKL_VML_MODE VML_LA
-#endif
-
-#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)     \
-  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
-    enum { IsSupported = 1 };                                                    \
-    static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func,        \
-                            int size, const EIGENTYPE* src, EIGENTYPE* dst) {    \
-      VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst);                           \
-    }                                                                            \
-  };
-
-#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)  \
-  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
-    enum { IsSupported = 1 };                                                    \
-    static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func,        \
-                            int size, const EIGENTYPE* src, EIGENTYPE* dst) {    \
-      MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE;                                    \
-      VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode);                  \
-    }                                                                            \
-  };
-
-#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)       \
-  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
-    enum { IsSupported = 1 };                                                    \
-    static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func,        \
-                          int size, const EIGENTYPE* src, EIGENTYPE* dst) {      \
-      EIGENTYPE exponent = func.m_exponent;                                      \
-      MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE;                                    \
-      VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent,               \
-                        (VMLTYPE*)dst, &vmlMode);                                \
-    }                                                                            \
-  };
-
-#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP)                   \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float)             \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
-
-#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)                \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8)   \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
-
-#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP)                        \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP)                         \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
-
-
-#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP)                \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float)         \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
-
-#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)             \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8)  \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
-
-#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP)                     \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP)                      \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
-
-
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin,  Sin)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos,  Cos)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan,  Tan)
-//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs,  Abs)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp,  Exp)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log,  Ln)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
-
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
-
-EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
-EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
-EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
-EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
-
-} // end namespace internal
-
-#endif // EIGEN_ASSIGN_VML_H
--- a/Eigen/src/Core/BandMatrix.h
+++ b/Eigen/src/Core/BandMatrix.h
@@ -87,7 +87,7 @@ class BandMatrixBase : public EigenBase<Derived>
      if (i<=supers())
      {
        start = supers()-i;
-        len = (std::min)(rows(),std::max<Index>(0,coeffs().rows() - (supers()-i)));
+        len = std::min(rows(),std::max<Index>(0,coeffs().rows() - (supers()-i)));
      }
      else if (i>=rows()-subs())
        len = std::max<Index>(0,coeffs().rows() - (i + 1 - rows() + subs()));
@@ -96,11 +96,11 @@ class BandMatrixBase : public EigenBase<Derived>

    /** \returns a vector expression of the main diagonal */
    inline Block<CoefficientsType,1,SizeAtCompileTime> diagonal()
-    { return Block<CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }
+    { return Block<CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,std::min(rows(),cols())); }

    /** \returns a vector expression of the main diagonal (const version) */
    inline const Block<const CoefficientsType,1,SizeAtCompileTime> diagonal() const
-    { return Block<const CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }
+    { return Block<const CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,std::min(rows(),cols())); }

    template<int Index> struct DiagonalIntReturnType {
      enum {
@@ -122,13 +122,13 @@ class BandMatrixBase : public EigenBase<Derived>
    /** \returns a vector expression of the \a N -th sub or super diagonal */
    template<int N> inline typename DiagonalIntReturnType<N>::Type diagonal()
    {
-      return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
+      return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, std::max(0,N), 1, diagonalLength(N));
    }

    /** \returns a vector expression of the \a N -th sub or super diagonal */
    template<int N> inline const typename DiagonalIntReturnType<N>::Type diagonal() const
    {
-      return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
+      return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, std::max(0,N), 1, diagonalLength(N));
    }

    /** \returns a vector expression of the \a i -th sub or super diagonal */
@@ -166,7 +166,7 @@ class BandMatrixBase : public EigenBase<Derived>
  protected:

    inline Index diagonalLength(Index i) const
-    { return i<0 ? (std::min)(cols(),rows()+i) : (std::min)(rows(),cols()-i); }
+    { return i<0 ? std::min(cols(),rows()+i) : std::min(rows(),cols()-i); }
 };

 /**
@@ -180,7 +180,7 @@ class BandMatrixBase : public EigenBase<Derived>
  * \param Cols Number of columns, or \b Dynamic
  * \param Supers Number of super diagonal
  * \param Subs Number of sub diagonal
-  * \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
+  * \param _Options A combination of either \b RowMajor or \b ColMajor, and of \b SelfAdjoint
  *                 The former controls \ref TopicStorageOrders "storage order", and defaults to
  *                 column-major. The latter controls whether the matrix represents a selfadjoint 
  *                 matrix in which case either Supers of Subs have to be null.
@@ -284,7 +284,6 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
      : m_coeffs(coeffs),
        m_rows(rows), m_supers(supers), m_subs(subs)
    {
-      EIGEN_UNUSED_VARIABLE(cols);
      //internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
    }

--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -94,7 +94,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess>
    MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
                       && (InnerStrideAtCompileTime == 1)
                        ? PacketAccessBit : 0,
-    MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
+    MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && ((OuterStrideAtCompileTime % packet_traits<Scalar>::size) == 0)) ? AlignedBit : 0,
    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
    FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
    FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
@@ -242,21 +242,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
    inline Index outerStride() const;
    #endif

-    const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const 
-    { 
-      return m_xpr; 
-    }
-      
-    Index startRow() const 
-    { 
-      return m_startRow.value(); 
-    }
-      
-    Index startCol() const 
-    { 
-      return m_startCol.value(); 
-    }
-
  protected:

    const typename XprType::Nested m_xpr;
@@ -319,11 +304,6 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
      init();
    }

-    const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const 
-    { 
-      return m_xpr; 
-    }
-      
    /** \sa MapBase::innerStride() */
    inline Index innerStride() const
    {
@@ -361,7 +341,7 @@ class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
                    : m_xpr.innerStride();
    }

-    typename XprType::Nested m_xpr;
+    const typename XprType::Nested m_xpr;
    int m_outerStride;
 };

--- a/Eigen/src/Core/BooleanRedux.h
+++ b/Eigen/src/Core/BooleanRedux.h
@@ -35,7 +35,7 @@ struct all_unroller
    row = (UnrollCount-1) % Derived::RowsAtCompileTime
  };

-  static inline bool run(const Derived &mat)
+  inline static bool run(const Derived &mat)
  {
    return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col);
  }
@@ -44,13 +44,13 @@ struct all_unroller
 template<typename Derived>
 struct all_unroller<Derived, 1>
 {
-  static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
+  inline static bool run(const Derived &mat) { return mat.coeff(0, 0); }
 };

 template<typename Derived>
 struct all_unroller<Derived, Dynamic>
 {
-  static inline bool run(const Derived &) { return false; }
+  inline static bool run(const Derived &) { return false; }
 };

 template<typename Derived, int UnrollCount>
@@ -61,7 +61,7 @@ struct any_unroller
    row = (UnrollCount-1) % Derived::RowsAtCompileTime
  };

-  static inline bool run(const Derived &mat)
+  inline static bool run(const Derived &mat)
  {
    return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
  }
@@ -70,13 +70,13 @@ struct any_unroller
 template<typename Derived>
 struct any_unroller<Derived, 1>
 {
-  static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
+  inline static bool run(const Derived &mat) { return mat.coeff(0, 0); }
 };

 template<typename Derived>
 struct any_unroller<Derived, Dynamic>
 {
-  static inline bool run(const Derived &) { return false; }
+  inline static bool run(const Derived &) { return false; }
 };

 } // end namespace internal
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -167,8 +167,8 @@ class CwiseBinaryOp : internal::no_assignment_operator,
    const BinaryOp& functor() const { return m_functor; }

  protected:
-    LhsNested m_lhs;
-    RhsNested m_rhs;
+    const LhsNested m_lhs;
+    const RhsNested m_rhs;
    const BinaryOp m_functor;
 };

--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -101,9 +101,6 @@ class CwiseNullaryOp : internal::no_assignment_operator,
      return m_functor.packetOp(index);
    }

-    /** \returns the functor representing the nullary operation */
-    const NullaryOp& functor() const { return m_functor; }
-
  protected:
    const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
    const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
@@ -745,7 +742,7 @@ struct setIdentity_impl<Derived, true>
  static EIGEN_STRONG_INLINE Derived& run(Derived& m)
  {
    m.setZero();
-    const Index size = (std::min)(m.rows(), m.cols());
+    const Index size = std::min(m.rows(), m.cols());
    for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
    return m;
  }
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -95,7 +95,7 @@ class CwiseUnaryOp : internal::no_assignment_operator,
    nestedExpression() { return m_xpr.const_cast_derived(); }

  protected:
-    typename XprType::Nested m_xpr;
+    const typename XprType::Nested m_xpr;
    const UnaryOp m_functor;
 };

--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h
@@ -97,7 +97,7 @@ class CwiseUnaryView : internal::no_assignment_operator,

  protected:
    // FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
-    typename internal::nested<MatrixType>::type m_matrix;
+    const typename internal::nested<MatrixType>::type m_matrix;
    ViewOp m_functor;
 };

--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -376,13 +376,12 @@ template<typename Derived> class DenseBase
    inline Derived& operator*=(const Scalar& other);
    inline Derived& operator/=(const Scalar& other);

-    typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
    /** \returns the matrix or vector obtained by evaluating this expression.
      *
      * Notice that in the case of a plain matrix or vector (not an expression) this function just returns
      * a const reference, in order to avoid a useless copy.
      */
-    EIGEN_STRONG_INLINE EvalReturnType eval() const
+    EIGEN_STRONG_INLINE const typename internal::eval<Derived>::type eval() const
    {
      // Even though MSVC does not honor strong inlining when the return type
      // is a dynamic matrix, we desperately need strong inlining for fixed
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -35,7 +35,7 @@ template<typename T> struct add_const_on_value_type_if_arithmetic
 /** \brief Base class providing read-only coefficient access to matrices and arrays.
  * \ingroup Core_Module
  * \tparam Derived Type of the derived class
-  * \tparam #ReadOnlyAccessors Constant indicating read-only access
+  * \tparam ReadOnlyAccessors Constant indicating read-only access
  *
  * This class defines the \c operator() \c const function and friends, which can be used to read specific
  * entries of a matrix or array.
@@ -212,7 +212,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      * to ensure that a packet really starts there. This method is only available on expressions having the
      * PacketAccessBit.
      *
-      * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
+      * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
      * starting at an address which is a multiple of the packet size.
      */
@@ -239,7 +239,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      * to ensure that a packet really starts there. This method is only available on expressions having the
      * PacketAccessBit and the LinearAccessBit.
      *
-      * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
+      * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
      * starting at an address which is a multiple of the packet size.
      */
@@ -275,7 +275,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
 /** \brief Base class providing read/write coefficient access to matrices and arrays.
  * \ingroup Core_Module
  * \tparam Derived Type of the derived class
-  * \tparam #WriteAccessors Constant indicating read/write access
+  * \tparam WriteAccessors Constant indicating read/write access
  *
  * This class defines the non-const \c operator() function and friends, which can be used to write specific
  * entries of a matrix or array. This class inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which
@@ -433,7 +433,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      * to ensure that a packet really starts there. This method is only available on expressions having the
      * PacketAccessBit.
      *
-      * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
+      * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
      * starting at an address which is a multiple of the packet size.
      */
@@ -567,7 +567,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
 /** \brief Base class providing direct read-only coefficient access to matrices and arrays.
  * \ingroup Core_Module
  * \tparam Derived Type of the derived class
-  * \tparam #DirectAccessors Constant indicating direct access
+  * \tparam DirectAccessors Constant indicating direct access
  *
  * This class defines functions to work with strides which can be used to access entries directly. This class
  * inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using
@@ -637,7 +637,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
 /** \brief Base class providing direct read/write coefficient access to matrices and arrays.
  * \ingroup Core_Module
  * \tparam Derived Type of the derived class
-  * \tparam #DirectWriteAccessors Constant indicating direct access
+  * \tparam DirectAccessors Constant indicating direct access
  *
  * This class defines functions to work with strides which can be used to access entries directly. This class
  * inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using
@@ -710,16 +710,16 @@ namespace internal {
 template<typename Derived, bool JustReturnZero>
 struct first_aligned_impl
 {
-  static inline typename Derived::Index run(const Derived&)
+  inline static typename Derived::Index run(const Derived&)
  { return 0; }
 };

 template<typename Derived>
 struct first_aligned_impl<Derived, false>
 {
-  static inline typename Derived::Index run(const Derived& m)
+  inline static typename Derived::Index run(const Derived& m)
  {
-    return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
+    return first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
  }
 };

@@ -729,7 +729,7 @@ struct first_aligned_impl<Derived, false>
  * documentation.
  */
 template<typename Derived>
-static inline typename Derived::Index first_aligned(const Derived& m)
+inline static typename Derived::Index first_aligned(const Derived& m)
 {
  return first_aligned_impl
          <Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -58,7 +58,7 @@ struct plain_array
  #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
    eigen_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
              && "this assertion is explained here: " \
-              "http://eigen.tuxfamily.org/dox-devel/TopicUnalignedArrayAssert.html" \
+              "http://eigen.tuxfamily.org/dox/UnalignedArrayAssert.html" \
              " **** READ THIS WEB PAGE !!! ****");
 #endif

@@ -104,8 +104,8 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
      : m_data(internal::constructor_without_unaligned_array_assert()) {}
    inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
-    static inline DenseIndex rows(void) {return _Rows;}
-    static inline DenseIndex cols(void) {return _Cols;}
+    inline static DenseIndex rows(void) {return _Rows;}
+    inline static DenseIndex cols(void) {return _Cols;}
    inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
    inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
    inline const T *data() const { return m_data.array; }
@@ -120,24 +120,14 @@ template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0
    inline DenseStorage(internal::constructor_without_unaligned_array_assert) {}
    inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
    inline void swap(DenseStorage& ) {}
-    static inline DenseIndex rows(void) {return _Rows;}
-    static inline DenseIndex cols(void) {return _Cols;}
+    inline static DenseIndex rows(void) {return _Rows;}
+    inline static DenseIndex cols(void) {return _Cols;}
    inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
    inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
    inline const T *data() const { return 0; }
    inline T *data() { return 0; }
 };

-// more specializations for null matrices; these are necessary to resolve ambiguities
-template<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>
-: public DenseStorage<T, 0, 0, 0, _Options> { };
-
-template<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>
-: public DenseStorage<T, 0, 0, 0, _Options> { };
-
-template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>
-: public DenseStorage<T, 0, 0, 0, _Options> { };
-
 // dynamic-size matrix with fixed-size storage
 template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
 {
@@ -251,7 +241,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
    { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
    inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
-    static inline DenseIndex rows(void) {return _Rows;}
+    inline static DenseIndex rows(void) {return _Rows;}
    inline DenseIndex cols(void) const {return m_cols;}
    inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex cols)
    {
@@ -288,7 +278,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
    inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
    inline DenseIndex rows(void) const {return m_rows;}
-    static inline DenseIndex cols(void) {return _Cols;}
+    inline static DenseIndex cols(void) {return _Cols;}
    inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex)
    {
      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@@ -2,7 +2,6 @@
 // for linear algebra.
 //
 // Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -88,7 +87,7 @@ template<typename MatrixType, int DiagIndex> class Diagonal
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)

    inline Index rows() const
-    { return m_index.value()<0 ? (std::min)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
+    { return m_index.value()<0 ? std::min(m_matrix.cols(),m_matrix.rows()+m_index.value()) : std::min(m_matrix.rows(),m_matrix.cols()-m_index.value()); }

    inline Index cols() const { return 1; }

@@ -102,15 +101,6 @@ template<typename MatrixType, int DiagIndex> class Diagonal
      return 0;
    }

-    typedef typename internal::conditional<
-                       internal::is_lvalue<MatrixType>::value,
-                       Scalar,
-                       const Scalar
-                     >::type ScalarWithConstIfNotLvalue;
-
-    inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
-    inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
-
    inline Scalar& coeffRef(Index row, Index)
    {
      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
@@ -143,19 +133,8 @@ template<typename MatrixType, int DiagIndex> class Diagonal
      return m_matrix.coeff(index+rowOffset(), index+colOffset());
    }

-    const typename internal::remove_all<typename MatrixType::Nested>::type& 
-    nestedExpression() const 
-    {
-      return m_matrix;
-    }
-
-    int index() const
-    {
-      return m_index.value();
-    }
-
  protected:
-    typename MatrixType::Nested m_matrix;
+    const typename MatrixType::Nested m_matrix;
    const internal::variable_if_dynamic<Index, DiagIndex> m_index;

  private:
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -72,7 +72,7 @@ class DiagonalBase : public EigenBase<Derived>
    const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
    operator*(const MatrixBase<MatrixDerived> &matrix) const;

-    inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
+    inline const DiagonalWrapper<CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
    inverse() const
    {
      return diagonal().cwiseInverse();
@@ -251,13 +251,13 @@ class DiagonalWrapper
    #endif

    /** Constructor from expression of diagonal coefficients to wrap. */
-    inline DiagonalWrapper(DiagonalVectorType& diagonal) : m_diagonal(diagonal) {}
+    inline DiagonalWrapper(const DiagonalVectorType& diagonal) : m_diagonal(diagonal) {}

    /** \returns a const reference to the wrapped expression of diagonal coefficients. */
    const DiagonalVectorType& diagonal() const { return m_diagonal; }

  protected:
-    typename DiagonalVectorType::Nested m_diagonal;
+    const typename DiagonalVectorType::Nested m_diagonal;
 };

 /** \returns a pseudo-expression of a diagonal matrix with *this as vector of diagonal coefficients
--- a/Eigen/src/Core/DiagonalProduct.h
+++ b/Eigen/src/Core/DiagonalProduct.h
@@ -107,8 +107,8 @@ class DiagonalProduct : internal::no_assignment_operator,
                     m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
    }

-    typename MatrixType::Nested m_matrix;
-    typename DiagonalType::Nested m_diagonal;
+    const typename MatrixType::Nested m_matrix;
+    const typename DiagonalType::Nested m_diagonal;
 };

 /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -116,9 +116,7 @@ MatrixBase<Derived>::eigen2_dot(const MatrixBase<OtherDerived>& other) const

 //---------- implementation of L2 norm and related functions ----------

-/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
-  * In both cases, it consists in the sum of the square of all the matrix entries.
-  * For vectors, this is also equals to the dot product of \c *this with itself.
+/** \returns the squared \em l2 norm of *this, i.e., for vectors, the dot product of *this with itself.
  *
  * \sa dot(), norm()
  */
@@ -128,9 +126,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
  return internal::real((*this).cwiseAbs2().sum());
 }

-/** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm.
-  * In both cases, it consists in the square root of the sum of the square of all the matrix entries.
-  * For vectors, this is also equals to the square root of the dot product of \c *this with itself.
+/** \returns the \em l2 norm of *this, i.e., for vectors, the square root of the dot product of *this with itself.
  *
  * \sa dot(), squaredNorm()
  */
@@ -176,7 +172,7 @@ template<typename Derived, int p>
 struct lpNorm_selector
 {
  typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
-  static inline RealScalar run(const MatrixBase<Derived>& m)
+  inline static RealScalar run(const MatrixBase<Derived>& m)
  {
    return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
  }
@@ -185,7 +181,7 @@ struct lpNorm_selector
 template<typename Derived>
 struct lpNorm_selector<Derived, 1>
 {
-  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
+  inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
  {
    return m.cwiseAbs().sum();
  }
@@ -194,7 +190,7 @@ struct lpNorm_selector<Derived, 1>
 template<typename Derived>
 struct lpNorm_selector<Derived, 2>
 {
-  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
+  inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
  {
    return m.norm();
  }
@@ -203,7 +199,7 @@ struct lpNorm_selector<Derived, 2>
 template<typename Derived>
 struct lpNorm_selector<Derived, Infinity>
 {
-  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
+  inline static typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
  {
    return m.cwiseAbs().maxCoeff();
  }
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -116,7 +116,7 @@ struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
  */
 template<typename Scalar> struct scalar_min_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::min; return (min)(a, b); }
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::min(a, b); }
  template<typename Packet>
  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return internal::pmin(a,b); }
@@ -139,7 +139,7 @@ struct functor_traits<scalar_min_op<Scalar> > {
  */
 template<typename Scalar> struct scalar_max_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::max; return (max)(a, b); }
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::max(a, b); }
  template<typename Packet>
  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return internal::pmax(a,b); }
@@ -165,10 +165,8 @@ template<typename Scalar> struct scalar_hypot_op {
 //   typedef typename NumTraits<Scalar>::Real result_type;
  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
  {
-    using std::max;
-    using std::min;
-    Scalar p = (max)(_x, _y);
-    Scalar q = (min)(_x, _y);
+    Scalar p = std::max(_x, _y);
+    Scalar q = std::min(_x, _y);
    Scalar qp = q/p;
    return p * sqrt(Scalar(1) + qp*qp);
  }
@@ -220,38 +218,6 @@ struct functor_traits<scalar_quotient_op<Scalar> > {
  };
 };

-/** \internal
-  * \brief Template functor to compute the and of two booleans
-  *
-  * \sa class CwiseBinaryOp, ArrayBase::operator&&
-  */
-struct scalar_boolean_and_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
-  EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
-};
-template<> struct functor_traits<scalar_boolean_and_op> {
-  enum {
-    Cost = NumTraits<bool>::AddCost,
-    PacketAccess = false
-  };
-};
-
-/** \internal
-  * \brief Template functor to compute the or of two booleans
-  *
-  * \sa class CwiseBinaryOp, ArrayBase::operator||
-  */
-struct scalar_boolean_or_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
-  EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
-};
-template<> struct functor_traits<scalar_boolean_or_op> {
-  enum {
-    Cost = NumTraits<bool>::AddCost,
-    PacketAccess = false
-  };
-};
-
 // unary functors:

 /** \internal
@@ -639,7 +605,7 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
  EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
  {
    eigen_assert(col==0 || row==0);
-    return impl.packetOp(col + row);
+    return impl(col + row);
  }

  // This proxy object handles the actual required temporaries, the different
@@ -784,9 +750,9 @@ struct functor_traits<scalar_acos_op<Scalar> >
  */
 template<typename Scalar> struct scalar_asin_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
-  inline const Scalar operator() (const Scalar& a) const { return asin(a); }
+  inline const Scalar operator() (const Scalar& a) const { return acos(a); }
  typedef typename packet_traits<Scalar>::type Packet;
-  inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
+  inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_asin_op<Scalar> >
--- a/Eigen/src/Core/Fuzzy.h
+++ b/Eigen/src/Core/Fuzzy.h
@@ -34,10 +34,9 @@ struct isApprox_selector
 {
  static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar prec)
  {
-    using std::min;
-    typename internal::nested<Derived,2>::type nested(x);
-    typename internal::nested<OtherDerived,2>::type otherNested(y);
-    return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
+    const typename internal::nested<Derived,2>::type nested(x);
+    const typename internal::nested<OtherDerived,2>::type otherNested(y);
+    return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * std::min(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
  }
 };

--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -1,624 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
-
-#ifndef EIGEN_GENERAL_PRODUCT_H
-#define EIGEN_GENERAL_PRODUCT_H
-
-/** \class GeneralProduct
-  * \ingroup Core_Module
-  *
-  * \brief Expression of the product of two general matrices or vectors
-  *
-  * \param LhsNested the type used to store the left-hand side
-  * \param RhsNested the type used to store the right-hand side
-  * \param ProductMode the type of the product
-  *
-  * This class represents an expression of the product of two general matrices.
-  * We call a general matrix, a dense matrix with full storage. For instance,
-  * This excludes triangular, selfadjoint, and sparse matrices.
-  * It is the return type of the operator* between general matrices. Its template
-  * arguments are determined automatically by ProductReturnType. Therefore,
-  * GeneralProduct should never be used direclty. To determine the result type of a
-  * function which involves a matrix product, use ProductReturnType::Type.
-  *
-  * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
-  */
-template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
-class GeneralProduct;
-
-enum {
-  Large = 2,
-  Small = 3
-};
-
-namespace internal {
-
-template<int Rows, int Cols, int Depth> struct product_type_selector;
-
-template<int Size, int MaxSize> struct product_size_category
-{
-  enum { is_large = MaxSize == Dynamic ||
-                    Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
-         value = is_large  ? Large
-               : Size == 1 ? 1
-                           : Small
-  };
-};
-
-template<typename Lhs, typename Rhs> struct product_type
-{
-  typedef typename remove_all<Lhs>::type _Lhs;
-  typedef typename remove_all<Rhs>::type _Rhs;
-  enum {
-    MaxRows  = _Lhs::MaxRowsAtCompileTime,
-    Rows  = _Lhs::RowsAtCompileTime,
-    MaxCols  = _Rhs::MaxColsAtCompileTime,
-    Cols  = _Rhs::ColsAtCompileTime,
-    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
-                                           _Rhs::MaxRowsAtCompileTime),
-    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
-                                        _Rhs::RowsAtCompileTime),
-    LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
-  };
-
-  // the splitting into different lines of code here, introducing the _select enums and the typedef below,
-  // is to work around an internal compiler error with gcc 4.1 and 4.2.
-private:
-  enum {
-    rows_select = product_size_category<Rows,MaxRows>::value,
-    cols_select = product_size_category<Cols,MaxCols>::value,
-    depth_select = product_size_category<Depth,MaxDepth>::value
-  };
-  typedef product_type_selector<rows_select, cols_select, depth_select> selector;
-
-public:
-  enum {
-    value = selector::ret
-  };
-#ifdef EIGEN_DEBUG_PRODUCT
-  static void debug()
-  {
-      EIGEN_DEBUG_VAR(Rows);
-      EIGEN_DEBUG_VAR(Cols);
-      EIGEN_DEBUG_VAR(Depth);
-      EIGEN_DEBUG_VAR(rows_select);
-      EIGEN_DEBUG_VAR(cols_select);
-      EIGEN_DEBUG_VAR(depth_select);
-      EIGEN_DEBUG_VAR(value);
-  }
-#endif
-};
-
-
-/* The following allows to select the kind of product at compile time
- * based on the three dimensions of the product.
- * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
-// FIXME I'm not sure the current mapping is the ideal one.
-template<int M, int N>  struct product_type_selector<M,N,1>              { enum { ret = OuterProduct }; };
-template<int Depth>     struct product_type_selector<1,    1,    Depth>  { enum { ret = InnerProduct }; };
-template<>              struct product_type_selector<1,    1,    1>      { enum { ret = InnerProduct }; };
-template<>              struct product_type_selector<Small,1,    Small>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<1,    Small,Small>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<Small,Small,Small>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<Small, Small, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
-template<>              struct product_type_selector<Small, Large, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
-template<>              struct product_type_selector<Large, Small, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
-template<>              struct product_type_selector<1,    Large,Small>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<1,    Large,Large>  { enum { ret = GemvProduct }; };
-template<>              struct product_type_selector<1,    Small,Large>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<Large,1,    Small>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<Large,1,    Large>  { enum { ret = GemvProduct }; };
-template<>              struct product_type_selector<Small,1,    Large>  { enum { ret = CoeffBasedProductMode }; };
-template<>              struct product_type_selector<Small,Small,Large>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Large,Small,Large>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Small,Large,Large>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Large,Large,Large>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Large,Small,Small>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Small,Large,Small>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Large,Large,Small>  { enum { ret = GemmProduct }; };
-
-} // end namespace internal
-
-/** \class ProductReturnType
-  * \ingroup Core_Module
-  *
-  * \brief Helper class to get the correct and optimized returned type of operator*
-  *
-  * \param Lhs the type of the left-hand side
-  * \param Rhs the type of the right-hand side
-  * \param ProductMode the type of the product (determined automatically by internal::product_mode)
-  *
-  * This class defines the typename Type representing the optimized product expression
-  * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
-  * is the recommended way to define the result type of a function returning an expression
-  * which involve a matrix product. The class Product should never be
-  * used directly.
-  *
-  * \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
-  */
-template<typename Lhs, typename Rhs, int ProductType>
-struct ProductReturnType
-{
-  // TODO use the nested type to reduce instanciations ????
-//   typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
-//   typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
-
-  typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
-};
-
-template<typename Lhs, typename Rhs>
-struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
-{
-  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
-  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
-  typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
-};
-
-template<typename Lhs, typename Rhs>
-struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
-{
-  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
-  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
-  typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
-};
-
-// this is a workaround for sun CC
-template<typename Lhs, typename Rhs>
-struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
-{};
-
-/***********************************************************************
-*  Implementation of Inner Vector Vector Product
-***********************************************************************/
-
-// FIXME : maybe the "inner product" could return a Scalar
-// instead of a 1x1 matrix ??
-// Pro: more natural for the user
-// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
-// product ends up to a row-vector times col-vector product... To tackle this use
-// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
-
-namespace internal {
-
-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
- : traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
-{};
-
-}
-
-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, InnerProduct>
-  : internal::no_assignment_operator,
-    public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
-{
-    typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
-  public:
-    GeneralProduct(const Lhs& lhs, const Rhs& rhs)
-    {
-      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
-        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-
-      Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
-    }
-
-    /** Convertion to scalar */
-    operator const typename Base::Scalar() const {
-      return Base::coeff(0,0);
-    }
-};
-
-/***********************************************************************
-*  Implementation of Outer Vector Vector Product
-***********************************************************************/
-
-namespace internal {
-template<int StorageOrder> struct outer_product_selector;
-
-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
- : traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
-{};
-
-}
-
-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, OuterProduct>
-  : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
-{
-  public:
-    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
-
-    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
-    {
-      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
-        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-    }
-
-    template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
-    {
-      internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
-    }
-};
-
-namespace internal {
-
-template<> struct outer_product_selector<ColMajor> {
-  template<typename ProductType, typename Dest>
-  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
-    typedef typename Dest::Index Index;
-    // FIXME make sure lhs is sequentially stored
-    // FIXME not very good if rhs is real and lhs complex while alpha is real too
-    const Index cols = dest.cols();
-    for (Index j=0; j<cols; ++j)
-      dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
-  }
-};
-
-template<> struct outer_product_selector<RowMajor> {
-  template<typename ProductType, typename Dest>
-  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
-    typedef typename Dest::Index Index;
-    // FIXME make sure rhs is sequentially stored
-    // FIXME not very good if lhs is real and rhs complex while alpha is real too
-    const Index rows = dest.rows();
-    for (Index i=0; i<rows; ++i)
-      dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
-  }
-};
-
-} // end namespace internal
-
-/***********************************************************************
-*  Implementation of General Matrix Vector Product
-***********************************************************************/
-
-/*  According to the shape/flags of the matrix we have to distinghish 3 different cases:
- *   1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
- *   2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
- *   3 - all other cases are handled using a simple loop along the outer-storage direction.
- *  Therefore we need a lower level meta selector.
- *  Furthermore, if the matrix is the rhs, then the product has to be transposed.
- */
-namespace internal {
-
-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
- : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
-{};
-
-template<int Side, int StorageOrder, bool BlasCompatible>
-struct gemv_selector;
-
-} // end namespace internal
-
-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, GemvProduct>
-  : public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
-{
-  public:
-    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
-
-    typedef typename Lhs::Scalar LhsScalar;
-    typedef typename Rhs::Scalar RhsScalar;
-
-    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
-    {
-//       EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
-//         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-    }
-
-    enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
-    typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
-
-    template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
-    {
-      eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
-      internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
-                       bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
-    }
-};
-
-namespace internal {
-
-// The vector is on the left => transposition
-template<int StorageOrder, bool BlasCompatible>
-struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
-{
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
-  {
-    Transpose<Dest> destT(dest);
-    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
-    gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
-      ::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
-        (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
-  }
-};
-
-template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
-
-template<typename Scalar,int Size,int MaxSize>
-struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
-{
-  EIGEN_STRONG_INLINE  Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
-};
-
-template<typename Scalar,int Size>
-struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
-{
-  EIGEN_STRONG_INLINE Scalar* data() { return 0; }
-};
-
-template<typename Scalar,int Size,int MaxSize>
-struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
-{
-  #if EIGEN_ALIGN_STATICALLY
-  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
-  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
-  #else
-  // Some architectures cannot align on the stack,
-  // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
-  enum {
-    ForceAlignment  = internal::packet_traits<Scalar>::Vectorizable,
-    PacketSize      = internal::packet_traits<Scalar>::size
-  };
-  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
-  EIGEN_STRONG_INLINE Scalar* data() {
-    return ForceAlignment
-            ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
-            : m_data.array;
-  }
-  #endif
-};
-
-template<> struct gemv_selector<OnTheRight,ColMajor,true>
-{
-  template<typename ProductType, typename Dest>
-  static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
-  {
-    typedef typename ProductType::Index Index;
-    typedef typename ProductType::LhsScalar   LhsScalar;
-    typedef typename ProductType::RhsScalar   RhsScalar;
-    typedef typename ProductType::Scalar      ResScalar;
-    typedef typename ProductType::RealScalar  RealScalar;
-    typedef typename ProductType::ActualLhsType ActualLhsType;
-    typedef typename ProductType::ActualRhsType ActualRhsType;
-    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
-    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
-    typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
-
-    ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
-    ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
-
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
-                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
-
-    enum {
-      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
-      // on, the other hand it is good for the cache to pack the vector anyways...
-      EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
-      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
-      MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
-    };
-
-    gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
-
-    bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
-    bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
-    
-    RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
-
-    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
-                                                  evalToDest ? dest.data() : static_dest.data());
-    
-    if(!evalToDest)
-    {
-      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      int size = dest.size();
-      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      #endif
-      if(!alphaIsCompatible)
-      {
-        MappedDest(actualDestPtr, dest.size()).setZero();
-        compatibleAlpha = RhsScalar(1);
-      }
-      else
-        MappedDest(actualDestPtr, dest.size()) = dest;
-    }
-
-    general_matrix_vector_product
-      <Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
-        actualLhs.rows(), actualLhs.cols(),
-        actualLhs.data(), actualLhs.outerStride(),
-        actualRhs.data(), actualRhs.innerStride(),
-        actualDestPtr, 1,
-        compatibleAlpha);
-
-    if (!evalToDest)
-    {
-      if(!alphaIsCompatible)
-        dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
-      else
-        dest = MappedDest(actualDestPtr, dest.size());
-    }
-  }
-};
-
-template<> struct gemv_selector<OnTheRight,RowMajor,true>
-{
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
-  {
-    typedef typename ProductType::LhsScalar LhsScalar;
-    typedef typename ProductType::RhsScalar RhsScalar;
-    typedef typename ProductType::Scalar    ResScalar;
-    typedef typename ProductType::Index Index;
-    typedef typename ProductType::ActualLhsType ActualLhsType;
-    typedef typename ProductType::ActualRhsType ActualRhsType;
-    typedef typename ProductType::_ActualRhsType _ActualRhsType;
-    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
-    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
-
-    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
-    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
-
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
-                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
-
-    enum {
-      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
-      // on, the other hand it is good for the cache to pack the vector anyways...
-      DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
-    };
-
-    gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
-
-    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
-        DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
-
-    if(!DirectlyUseRhs)
-    {
-      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      int size = actualRhs.size();
-      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      #endif
-      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
-    }
-
-    general_matrix_vector_product
-      <Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
-        actualLhs.rows(), actualLhs.cols(),
-        actualLhs.data(), actualLhs.outerStride(),
-        actualRhsPtr, 1,
-        dest.data(), dest.innerStride(),
-        actualAlpha);
-  }
-};
-
-template<> struct gemv_selector<OnTheRight,ColMajor,false>
-{
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
-  {
-    typedef typename Dest::Index Index;
-    // TODO makes sure dest is sequentially stored in memory, otherwise use a temp
-    const Index size = prod.rhs().rows();
-    for(Index k=0; k<size; ++k)
-      dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
-  }
-};
-
-template<> struct gemv_selector<OnTheRight,RowMajor,false>
-{
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
-  {
-    typedef typename Dest::Index Index;
-    // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
-    const Index rows = prod.rows();
-    for(Index i=0; i<rows; ++i)
-      dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
-  }
-};
-
-} // end namespace internal
-
-/***************************************************************************
-* Implementation of matrix base methods
-***************************************************************************/
-
-/** \returns the matrix product of \c *this and \a other.
-  *
-  * \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
-  *
-  * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
-  */
-template<typename Derived>
-template<typename OtherDerived>
-inline const typename ProductReturnType<Derived, OtherDerived>::Type
-MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
-{
-  // A note regarding the function declaration: In MSVC, this function will sometimes
-  // not be inlined since DenseStorage is an unwindable object for dynamic
-  // matrices and product types are holding a member to store the result.
-  // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
-  enum {
-    ProductIsValid =  Derived::ColsAtCompileTime==Dynamic
-                   || OtherDerived::RowsAtCompileTime==Dynamic
-                   || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
-    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
-    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
-  };
-  // note to the lost user:
-  //    * for a dot product use: v1.dot(v2)
-  //    * for a coeff-wise product use: v1.cwiseProduct(v2)
-  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
-    INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
-  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
-    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
-  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
-#ifdef EIGEN_DEBUG_PRODUCT
-  internal::product_type<Derived,OtherDerived>::debug();
-#endif
-  return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
-}
-
-/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
-  *
-  * The returned product will behave like any other expressions: the coefficients of the product will be
-  * computed once at a time as requested. This might be useful in some extremely rare cases when only
-  * a small and no coherent fraction of the result's coefficients have to be computed.
-  *
-  * \warning This version of the matrix product can be much much slower. So use it only if you know
-  * what you are doing and that you measured a true speed improvement.
-  *
-  * \sa operator*(const MatrixBase&)
-  */
-template<typename Derived>
-template<typename OtherDerived>
-const typename LazyProductReturnType<Derived,OtherDerived>::Type
-MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
-{
-  enum {
-    ProductIsValid =  Derived::ColsAtCompileTime==Dynamic
-                   || OtherDerived::RowsAtCompileTime==Dynamic
-                   || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
-    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
-    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
-  };
-  // note to the lost user:
-  //    * for a dot product use: v1.dot(v2)
-  //    * for a coeff-wise product use: v1.cwiseProduct(v2)
-  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
-    INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
-  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
-    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
-  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
-
-  return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
-}
-
-#endif // EIGEN_PRODUCT_H
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -134,12 +134,12 @@ pdiv(const Packet& a,
 /** \internal \returns the min of \a a and \a b  (coeff-wise) */
 template<typename Packet> inline Packet
 pmin(const Packet& a,
-        const Packet& b) { using std::min; return (min)(a, b); }
+        const Packet& b) { return std::min(a, b); }

 /** \internal \returns the max of \a a and \a b  (coeff-wise) */
 template<typename Packet> inline Packet
 pmax(const Packet& a,
-        const Packet& b) { using std::max; return (max)(a, b); }
+        const Packet& b) { return std::max(a, b); }

 /** \internal \returns the absolute value of \a a */
 template<typename Packet> inline Packet
@@ -286,7 +286,7 @@ pmadd(const Packet&  a,
 { return padd(pmul(a, b),c); }

 /** \internal \returns a packet version of \a *from.
-  * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
+  * \If LoadMode equals Aligned, \a from must be 16 bytes aligned */
 template<typename Packet, int LoadMode>
 inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
 {
@@ -297,7 +297,7 @@ inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
 }

 /** \internal copy the packet \a from to \a *to.
-  * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
+  * If StoreMode equals Aligned, \a to must be 16 bytes aligned */
 template<typename Scalar, typename Packet, int LoadMode>
 inline void pstoret(Scalar* to, const Packet& from)
 {
@@ -312,7 +312,7 @@ template<int Offset,typename PacketType>
 struct palign_impl
 {
  // by default data are aligned, so there is nothing to be done :)
-  static inline void run(PacketType&, const PacketType&) {}
+  inline static void run(PacketType&, const PacketType&) {}
 };

 /** \internal update \a first using the concatenation of the \a Offset last elements
--- a/Eigen/src/Core/IO.h
+++ b/Eigen/src/Core/IO.h
@@ -141,8 +141,7 @@ struct significant_decimals_default_impl
  typedef typename NumTraits<Scalar>::Real RealScalar;
  static inline int run()
  {
-    using std::ceil;
-    return cast<RealScalar,int>(ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
+    return cast<RealScalar,int>(std::ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
  }
 };

@@ -171,7 +170,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
    return s;
  }
  
-  typename Derived::Nested m = _m;
+  const typename Derived::Nested m = _m;
  typedef typename Derived::Scalar Scalar;
  typedef typename Derived::Index Index;

--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -31,10 +31,10 @@
  *
  * \brief A matrix or vector expression mapping an existing array of data.
  *
-  * \tparam PlainObjectType the equivalent matrix type of the mapped data
-  * \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned.
-  *                The default is \c #Unaligned.
-  * \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
+  * \param PlainObjectType the equivalent matrix type of the mapped data
+  * \param MapOptions specifies whether the pointer is \c Aligned, or \c Unaligned.
+  *                The default is \c Unaligned.
+  * \param StrideType optionnally specifies strides. By default, Map assumes the memory layout
  *                   of an ordinary, contiguous array. This can be overridden by specifying strides.
  *                   The type passed here must be a specialization of the Stride template, see examples below.
  *
@@ -72,9 +72,9 @@
  * Example: \include Map_placement_new.cpp
  * Output: \verbinclude Map_placement_new.out
  *
-  * This class is the return type of PlainObjectBase::Map() but can also be used directly.
+  * This class is the return type of Matrix::Map() but can also be used directly.
  *
-  * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
+  * \sa Matrix::Map(), \ref TopicStorageOrders
  */

 namespace internal {
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -170,8 +170,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
                                        internal::inner_stride_at_compile_time<Derived>::ret==1),
                          PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
-      eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
-                   && "data is not aligned");
+      eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % (sizeof(Scalar)*internal::packet_traits<Scalar>::size)) == 0)
+        && "data is not aligned");
    }

    PointerType m_data;
@@ -238,7 +238,7 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
                (this->m_data + index * innerStride(), x);
    }

-    explicit inline MapBase(PointerType data) : Base(data) {}
+    inline MapBase(PointerType data) : Base(data) {}
    inline MapBase(PointerType data, Index size) : Base(data, size) {}
    inline MapBase(PointerType data, Index rows, Index cols) : Base(data, rows, cols) {}

--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -87,8 +87,7 @@ struct real_impl<std::complex<RealScalar> >
 {
  static inline RealScalar run(const std::complex<RealScalar>& x)
  {
-    using std::real;
-    return real(x);
+    return std::real(x);
  }
 };

@@ -123,8 +122,7 @@ struct imag_impl<std::complex<RealScalar> >
 {
  static inline RealScalar run(const std::complex<RealScalar>& x)
  {
-    using std::imag;
-    return imag(x);
+    return std::imag(x);
  }
 };

@@ -246,8 +244,7 @@ struct conj_impl<std::complex<RealScalar> >
 {
  static inline std::complex<RealScalar> run(const std::complex<RealScalar>& x)
  {
-    using std::conj;
-    return conj(x);
+    return std::conj(x);
  }
 };

@@ -273,8 +270,7 @@ struct abs_impl
  typedef typename NumTraits<Scalar>::Real RealScalar;
  static inline RealScalar run(const Scalar& x)
  {
-    using std::abs;
-    return abs(x);
+    return std::abs(x);
  }
 };

@@ -309,7 +305,7 @@ struct abs2_impl<std::complex<RealScalar> >
 {
  static inline RealScalar run(const std::complex<RealScalar>& x)
  {
-    return real(x)*real(x) + imag(x)*imag(x);
+    return std::norm(x);
  }
 };

@@ -373,12 +369,10 @@ struct hypot_impl
  typedef typename NumTraits<Scalar>::Real RealScalar;
  static inline RealScalar run(const Scalar& x, const Scalar& y)
  {
-    using std::max;
-    using std::min;
    RealScalar _x = abs(x);
    RealScalar _y = abs(y);
-    RealScalar p = (max)(_x, _y);
-    RealScalar q = (min)(_x, _y);
+    RealScalar p = std::max(_x, _y);
+    RealScalar q = std::min(_x, _y);
    RealScalar qp = q/p;
    return p * sqrt(RealScalar(1) + qp*qp);
  }
@@ -426,8 +420,7 @@ struct sqrt_default_impl
 {
  static inline Scalar run(const Scalar& x)
  {
-    using std::sqrt;
-    return sqrt(x);
+    return std::sqrt(x);
  }
 };

@@ -467,7 +460,7 @@ inline EIGEN_MATHFUNC_RETVAL(sqrt, Scalar) sqrt(const Scalar& x)
 // This macro instanciate all the necessary template mechanism which is common to all unary real functions.
 #define EIGEN_MATHFUNC_STANDARD_REAL_UNARY(NAME) \
  template<typename Scalar, bool IsInteger> struct NAME##_default_impl {            \
-    static inline Scalar run(const Scalar& x) { using std::NAME; return NAME(x); }  \
+    static inline Scalar run(const Scalar& x) { return std::NAME(x); }              \
  };                                                                                \
  template<typename Scalar> struct NAME##_default_impl<Scalar, true> {              \
    static inline Scalar run(const Scalar&) {                                       \
@@ -502,8 +495,7 @@ struct atan2_default_impl
  typedef Scalar retval;
  static inline Scalar run(const Scalar& x, const Scalar& y)
  {
-    using std::atan2;
-    return atan2(x, y);
+    return std::atan2(x, y);
  }
 };

@@ -542,8 +534,7 @@ struct pow_default_impl
  typedef Scalar retval;
  static inline Scalar run(const Scalar& x, const Scalar& y)
  {
-    using std::pow;
-    return pow(x, y);
+    return std::pow(x, y);
  }
 };

@@ -552,7 +543,7 @@ struct pow_default_impl<Scalar, true>
 {
  static inline Scalar run(Scalar x, Scalar y)
  {
-    Scalar res(1);
+    Scalar res = 1;
    eigen_assert(!NumTraits<Scalar>::IsSigned || y >= 0);
    if(y & 1) res *= x;
    y >>= 1;
@@ -735,8 +726,7 @@ struct scalar_fuzzy_default_impl<Scalar, false, false>
  }
  static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
  {
-    using std::min;
-    return abs(x - y) <= (min)(abs(x), abs(y)) * prec;
+    return abs(x - y) <= std::min(abs(x), abs(y)) * prec;
  }
  static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
  {
@@ -774,8 +764,7 @@ struct scalar_fuzzy_default_impl<Scalar, true, false>
  }
  static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
  {
-    using std::min;
-    return abs2(x - y) <= (min)(abs2(x), abs2(y)) * prec * prec;
+    return abs2(x - y) <= std::min(abs2(x), abs2(y)) * prec * prec;
  }
 };

@@ -837,17 +826,6 @@ template<> struct scalar_fuzzy_impl<bool>
  
 };

-/****************************************************************************
-* Special functions                                                          *
-****************************************************************************/
-
-// std::isfinite is non standard, so let's define our own version,
-// even though it is not very efficient.
-template<typename T> bool isfinite(const T& x)
-{
-  return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
-}
-
 } // end namespace internal

 #endif // EIGEN_MATHFUNCTIONS_H
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -43,8 +43,8 @@
  * \tparam _Cols Number of columns, or \b Dynamic
  *
  * The remaining template parameters are optional -- in most cases you don't have to worry about them.
-  * \tparam _Options \anchor matrix_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either
-  *                 \b #AutoAlign or \b #DontAlign.
+  * \tparam _Options \anchor matrix_tparam_options A combination of either \b RowMajor or \b ColMajor, and of either
+  *                 \b AutoAlign or \b DontAlign.
  *                 The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
  *                 for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
  * \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note").
@@ -153,6 +153,10 @@ class Matrix

    typedef typename Base::PlainObject PlainObject;

+    enum { NeedsToAlign = (!(Options&DontAlign))
+                          && SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
+    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
+
    using Base::base;
    using Base::coeffRef;

@@ -411,6 +415,25 @@ EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)

 #undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
 #undef EIGEN_MAKE_TYPEDEFS
-#undef EIGEN_MAKE_FIXED_TYPEDEFS
+
+#undef EIGEN_MAKE_TYPEDEFS_LARGE
+
+#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
+using Eigen::Matrix##SizeSuffix##TypeSuffix; \
+using Eigen::Vector##SizeSuffix##TypeSuffix; \
+using Eigen::RowVector##SizeSuffix##TypeSuffix;
+
+#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
+
+#define EIGEN_USING_MATRIX_TYPEDEFS \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)

 #endif // EIGEN_MATRIX_H
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -111,7 +111,7 @@ template<typename Derived> class MatrixBase

    /** \returns the size of the main diagonal, which is min(rows(),cols()).
      * \sa rows(), cols(), SizeAtCompileTime. */
-    inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
+    inline Index diagonalSize() const { return std::min(rows(),cols()); }

    /** \brief The plain matrix type corresponding to this expression.
      *
@@ -330,7 +330,7 @@ template<typename Derived> class MatrixBase
    /** \returns an \link ArrayBase Array \endlink expression of this matrix
      * \sa ArrayBase::matrix() */
    ArrayWrapper<Derived> array() { return derived(); }
-    const ArrayWrapper<const Derived> array() const { return derived(); }
+    const ArrayWrapper<Derived> array() const { return derived(); }

 /////////// LU module ///////////

@@ -465,8 +465,6 @@ template<typename Derived> class MatrixBase
    const MatrixFunctionReturnValue<Derived> sinh() const;
    const MatrixFunctionReturnValue<Derived> cos() const;
    const MatrixFunctionReturnValue<Derived> sin() const;
-    const MatrixSquareRootReturnValue<Derived> sqrt() const;
-    const MatrixLogarithmReturnValue<Derived> log() const;

 #ifdef EIGEN2_SUPPORT
    template<typename ProductDerived, typename Lhs, typename Rhs>
@@ -513,10 +511,10 @@ template<typename Derived> class MatrixBase
  protected:
    // mixing arrays and matrices is not legal
    template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
-    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+    {EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
    // mixing arrays and matrices is not legal
    template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& )
-    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+    {EIGEN_STATIC_ASSERT(sizeof(typename OtherDerived::Scalar)==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES);}
 };

 #endif // EIGEN_MATRIXBASE_H
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -81,14 +81,14 @@ template<typename T> struct GenericNumTraits
                   >::type NonInteger;
  typedef T Nested;

-  static inline Real epsilon() { return std::numeric_limits<T>::epsilon(); }
-  static inline Real dummy_precision()
+  inline static Real epsilon() { return std::numeric_limits<T>::epsilon(); }
+  inline static Real dummy_precision()
  {
    // make sure to override this for floating-point types
    return Real(0);
  }
-  static inline T highest() { return (std::numeric_limits<T>::max)(); }
-  static inline T lowest()  { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
+  inline static T highest() { return std::numeric_limits<T>::max(); }
+  inline static T lowest()  { return IsInteger ? std::numeric_limits<T>::min() : (-std::numeric_limits<T>::max()); }
  
 #ifdef EIGEN2_SUPPORT
  enum {
@@ -104,12 +104,12 @@ template<typename T> struct NumTraits : GenericNumTraits<T>
 template<> struct NumTraits<float>
  : GenericNumTraits<float>
 {
-  static inline float dummy_precision() { return 1e-5f; }
+  inline static float dummy_precision() { return 1e-5f; }
 };

 template<> struct NumTraits<double> : GenericNumTraits<double>
 {
-  static inline double dummy_precision() { return 1e-12; }
+  inline static double dummy_precision() { return 1e-12; }
 };

 template<> struct NumTraits<long double>
@@ -130,8 +130,8 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
    MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
  };

-  static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
-  static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
+  inline static Real epsilon() { return NumTraits<Real>::epsilon(); }
+  inline static Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
 };

 template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -511,7 +511,7 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp

  protected:

-    typename IndicesType::Nested m_indices;
+    const typename IndicesType::Nested m_indices;
 };

 /** \returns the matrix with the permutation applied to the columns.
@@ -608,7 +608,7 @@ struct permut_matrix_product_retval

  protected:
    const PermutationType& m_permutation;
-    typename MatrixType::Nested m_matrix;
+    const typename MatrixType::Nested m_matrix;
 };

 /* Template partial specialization for transposed/inverse permutations */
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -34,26 +34,13 @@

 namespace internal {

-template<typename Index>
-EIGEN_ALWAYS_INLINE void check_rows_cols_for_overflow(Index rows, Index cols)
-{
-  // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
-  // we assume Index is signed
-  Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
-  bool error = (rows < 0  || cols < 0)  ? true
-             : (rows == 0 || cols == 0) ? false
-                                        : (rows > max_index / cols);
-  if (error)
-    throw_std_bad_alloc();
-}
-
 template <typename Derived, typename OtherDerived = Derived, bool IsVector = static_cast<bool>(Derived::IsVectorAtCompileTime)> struct conservative_resize_like_impl;

 template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;

 } // end namespace internal

-/** \class PlainObjectBase
+/**
  * \brief %Dense storage base class for matrices and arrays.
  *
  * This class can be extended with the help of the plugin mechanism described on the page
@@ -61,29 +48,8 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
  *
  * \sa \ref TopicClassHierarchy
  */
-#ifdef EIGEN_PARSED_BY_DOXYGEN
-namespace internal {
-
-// this is a warkaround to doxygen not being able to understand the inheritence logic
-// when it is hidden by the dense_xpr_base helper struct.
-template<typename Derived> struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase<Derived> {};
-/** This class is just a workaround for Doxygen and it does not not actually exist. */
-template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
-struct dense_xpr_base_dispatcher_for_doxygen<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
-    : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
-/** This class is just a workaround for Doxygen and it does not not actually exist. */
-template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
-struct dense_xpr_base_dispatcher_for_doxygen<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
-    : public ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > {};
-
-} // namespace internal
-
-template<typename Derived>
-class PlainObjectBase : public internal::dense_xpr_base_dispatcher_for_doxygen<Derived>
-#else
 template<typename Derived>
 class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
-#endif
 {
  public:
    enum { Options = internal::traits<Derived>::Options };
@@ -118,12 +84,14 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
    template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; };
    template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; };
+    

  protected:
    DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;

  public:
-    enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
+    enum { NeedsToAlign = (!(Options&DontAlign))
+                          && SizeAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*SizeAtCompileTime)%16)==0 };
    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)

    Base& base() { return *static_cast<Base*>(this); }
@@ -232,13 +200,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
    {
      #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
-        internal::check_rows_cols_for_overflow(rows, cols);
        Index size = rows*cols;
        bool size_changed = size != this->size();
        m_storage.resize(size, rows, cols);
        if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
      #else
-        internal::check_rows_cols_for_overflow(rows, cols);
        m_storage.resize(rows*cols, rows, cols);
      #endif
    }
@@ -307,7 +273,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
    {
      const OtherDerived& other = _other.derived();
-      internal::check_rows_cols_for_overflow(other.rows(), other.cols());
      const Index othersize = other.rows()*other.cols();
      if(RowsAtCompileTime == 1)
      {
@@ -452,7 +417,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
    {
      _check_template_params();
-      internal::check_rows_cols_for_overflow(other.derived().rows(), other.derived().cols());
      Base::operator=(other.derived());
    }

@@ -461,71 +425,74 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
      * \a data pointers.
      *
+      * These methods do not allow to specify strides. If you need to specify strides, you have to
+      * use the Map class directly.
+      *
      * \see class Map
      */
    //@{
-    static inline ConstMapType Map(const Scalar* data)
+    inline static ConstMapType Map(const Scalar* data)
    { return ConstMapType(data); }
-    static inline MapType Map(Scalar* data)
+    inline static MapType Map(Scalar* data)
    { return MapType(data); }
-    static inline ConstMapType Map(const Scalar* data, Index size)
+    inline static ConstMapType Map(const Scalar* data, Index size)
    { return ConstMapType(data, size); }
-    static inline MapType Map(Scalar* data, Index size)
+    inline static MapType Map(Scalar* data, Index size)
    { return MapType(data, size); }
-    static inline ConstMapType Map(const Scalar* data, Index rows, Index cols)
+    inline static ConstMapType Map(const Scalar* data, Index rows, Index cols)
    { return ConstMapType(data, rows, cols); }
-    static inline MapType Map(Scalar* data, Index rows, Index cols)
+    inline static MapType Map(Scalar* data, Index rows, Index cols)
    { return MapType(data, rows, cols); }

-    static inline ConstAlignedMapType MapAligned(const Scalar* data)
+    inline static ConstAlignedMapType MapAligned(const Scalar* data)
    { return ConstAlignedMapType(data); }
-    static inline AlignedMapType MapAligned(Scalar* data)
+    inline static AlignedMapType MapAligned(Scalar* data)
    { return AlignedMapType(data); }
-    static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size)
+    inline static ConstAlignedMapType MapAligned(const Scalar* data, Index size)
    { return ConstAlignedMapType(data, size); }
-    static inline AlignedMapType MapAligned(Scalar* data, Index size)
+    inline static AlignedMapType MapAligned(Scalar* data, Index size)
    { return AlignedMapType(data, size); }
-    static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
+    inline static ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
    { return ConstAlignedMapType(data, rows, cols); }
-    static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
+    inline static AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
    { return AlignedMapType(data, rows, cols); }

    template<int Outer, int Inner>
-    static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
+    inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
    { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, stride); }
    template<int Outer, int Inner>
-    static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
+    inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
    { return typename StridedMapType<Stride<Outer, Inner> >::type(data, stride); }
    template<int Outer, int Inner>
-    static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+    inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
    { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, size, stride); }
    template<int Outer, int Inner>
-    static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+    inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
    { return typename StridedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
    template<int Outer, int Inner>
-    static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+    inline static typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
    { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
    template<int Outer, int Inner>
-    static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+    inline static typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
    { return typename StridedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }

    template<int Outer, int Inner>
-    static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
+    inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
    { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
    template<int Outer, int Inner>
-    static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
+    inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
    { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
    template<int Outer, int Inner>
-    static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+    inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
    { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
    template<int Outer, int Inner>
-    static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+    inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
    { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
    template<int Outer, int Inner>
-    static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+    inline static typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
    { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
    template<int Outer, int Inner>
-    static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+    inline static typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
    { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
    //@}

@@ -615,12 +582,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    template<typename T0, typename T1>
    EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
    {
-      EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
-                          bool(NumTraits<T1>::IsInteger),
-                          FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
      eigen_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
             && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
-      internal::check_rows_cols_for_overflow(rows, cols);      
      m_storage.resize(rows*cols,rows,cols);
      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
    }
@@ -647,7 +610,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type

  public:
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-    static EIGEN_STRONG_INLINE void _check_template_params()
+    EIGEN_STRONG_INLINE static void _check_template_params()
    {
      EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
                        && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0)
@@ -678,15 +641,14 @@ struct internal::conservative_resize_like_impl
    if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows
         (!Derived::IsRowMajor && _this.rows() == rows) )  // column-major and we change only the number of columns
    {
-      internal::check_rows_cols_for_overflow(rows, cols);
      _this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
    }
    else
    {
      // The storage order does not allow us to use reallocation.
      typename Derived::PlainObject tmp(rows,cols);
-      const Index common_rows = (std::min)(rows, _this.rows());
-      const Index common_cols = (std::min)(cols, _this.cols());
+      const Index common_rows = std::min(rows, _this.rows());
+      const Index common_cols = std::min(cols, _this.cols());
      tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
      _this.derived().swap(tmp);
    }
@@ -719,8 +681,8 @@ struct internal::conservative_resize_like_impl
    {
      // The storage order does not allow us to use reallocation.
      typename Derived::PlainObject tmp(other);
-      const Index common_rows = (std::min)(tmp.rows(), _this.rows());
-      const Index common_cols = (std::min)(tmp.cols(), _this.cols());
+      const Index common_rows = std::min(tmp.rows(), _this.rows());
+      const Index common_cols = std::min(tmp.cols(), _this.cols());
      tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
      _this.derived().swap(tmp);
    }
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -1,7 +1,8 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -25,89 +26,603 @@
 #ifndef EIGEN_PRODUCT_H
 #define EIGEN_PRODUCT_H

-template<typename Lhs, typename Rhs> class Product;
-template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
-
-/** \class Product
+/** \class GeneralProduct
  * \ingroup Core_Module
  *
-  * \brief Expression of the product of two arbitrary matrices or vectors
+  * \brief Expression of the product of two general matrices or vectors
  *
-  * \param Lhs the type of the left-hand side expression
-  * \param Rhs the type of the right-hand side expression
+  * \param LhsNested the type used to store the left-hand side
+  * \param RhsNested the type used to store the right-hand side
+  * \param ProductMode the type of the product
  *
-  * This class represents an expression of the product of two arbitrary matrices.
+  * This class represents an expression of the product of two general matrices.
+  * We call a general matrix, a dense matrix with full storage. For instance,
+  * This excludes triangular, selfadjoint, and sparse matrices.
+  * It is the return type of the operator* between general matrices. Its template
+  * arguments are determined automatically by ProductReturnType. Therefore,
+  * GeneralProduct should never be used direclty. To determine the result type of a
+  * function which involves a matrix product, use ProductReturnType::Type.
  *
+  * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
  */
+template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
+class GeneralProduct;
+
+enum {
+  Large = 2,
+  Small = 3
+};

 namespace internal {
-template<typename Lhs, typename Rhs>
-struct traits<Product<Lhs, Rhs> >
+
+template<int Rows, int Cols, int Depth> struct product_type_selector;
+
+template<int Size, int MaxSize> struct product_size_category
 {
-  typedef MatrixXpr XprKind;
-  typedef typename remove_all<Lhs>::type LhsCleaned;
-  typedef typename remove_all<Rhs>::type RhsCleaned;
-  typedef typename scalar_product_traits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
-  typedef typename promote_storage_type<typename traits<LhsCleaned>::StorageKind,
-                                        typename traits<RhsCleaned>::StorageKind>::ret StorageKind;
-  typedef typename promote_index_type<typename traits<LhsCleaned>::Index,
-                                      typename traits<RhsCleaned>::Index>::type Index;
-  enum {
-    RowsAtCompileTime = LhsCleaned::RowsAtCompileTime,
-    ColsAtCompileTime = RhsCleaned::ColsAtCompileTime,
-    MaxRowsAtCompileTime = LhsCleaned::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = RhsCleaned::MaxColsAtCompileTime,
-    Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0), // TODO should be no storage order
-    CoeffReadCost = 0 // TODO CoeffReadCost should not be part of the expression traits
+  enum { is_large = MaxSize == Dynamic ||
+                    Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
+         value = is_large  ? Large
+               : Size == 1 ? 1
+                           : Small
  };
 };
+
+template<typename Lhs, typename Rhs> struct product_type
+{
+  typedef typename remove_all<Lhs>::type _Lhs;
+  typedef typename remove_all<Rhs>::type _Rhs;
+  enum {
+    MaxRows  = _Lhs::MaxRowsAtCompileTime,
+    Rows  = _Lhs::RowsAtCompileTime,
+    MaxCols  = _Rhs::MaxColsAtCompileTime,
+    Cols  = _Rhs::ColsAtCompileTime,
+    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
+                                           _Rhs::MaxRowsAtCompileTime),
+    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
+                                        _Rhs::RowsAtCompileTime),
+    LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+  };
+
+  // the splitting into different lines of code here, introducing the _select enums and the typedef below,
+  // is to work around an internal compiler error with gcc 4.1 and 4.2.
+private:
+  enum {
+    rows_select = product_size_category<Rows,MaxRows>::value,
+    cols_select = product_size_category<Cols,MaxCols>::value,
+    depth_select = product_size_category<Depth,MaxDepth>::value
+  };
+  typedef product_type_selector<rows_select, cols_select, depth_select> selector;
+
+public:
+  enum {
+    value = selector::ret
+  };
+#ifdef EIGEN_DEBUG_PRODUCT
+  static void debug()
+  {
+      EIGEN_DEBUG_VAR(Rows);
+      EIGEN_DEBUG_VAR(Cols);
+      EIGEN_DEBUG_VAR(Depth);
+      EIGEN_DEBUG_VAR(rows_select);
+      EIGEN_DEBUG_VAR(cols_select);
+      EIGEN_DEBUG_VAR(depth_select);
+      EIGEN_DEBUG_VAR(value);
+  }
+#endif
+};
+
+
+/* The following allows to select the kind of product at compile time
+ * based on the three dimensions of the product.
+ * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
+// FIXME I'm not sure the current mapping is the ideal one.
+template<int M, int N>  struct product_type_selector<M,N,1>              { enum { ret = OuterProduct }; };
+template<int Depth>     struct product_type_selector<1,    1,    Depth>  { enum { ret = InnerProduct }; };
+template<>              struct product_type_selector<1,    1,    1>      { enum { ret = InnerProduct }; };
+template<>              struct product_type_selector<Small,1,    Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<1,    Small,Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Small,Small,Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Small, Small, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
+template<>              struct product_type_selector<Small, Large, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
+template<>              struct product_type_selector<Large, Small, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
+template<>              struct product_type_selector<1,    Large,Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<1,    Large,Large>  { enum { ret = GemvProduct }; };
+template<>              struct product_type_selector<1,    Small,Large>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Large,1,    Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Large,1,    Large>  { enum { ret = GemvProduct }; };
+template<>              struct product_type_selector<Small,1,    Large>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Small,Small,Large>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Large,Small,Large>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Small,Large,Large>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Large,Large,Large>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Large,Small,Small>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Small,Large,Small>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Large,Large,Small>  { enum { ret = GemmProduct }; };
+
 } // end namespace internal

+/** \class ProductReturnType
+  * \ingroup Core_Module
+  *
+  * \brief Helper class to get the correct and optimized returned type of operator*
+  *
+  * \param Lhs the type of the left-hand side
+  * \param Rhs the type of the right-hand side
+  * \param ProductMode the type of the product (determined automatically by internal::product_mode)
+  *
+  * This class defines the typename Type representing the optimized product expression
+  * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
+  * is the recommended way to define the result type of a function returning an expression
+  * which involve a matrix product. The class Product should never be
+  * used directly.
+  *
+  * \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
+  */
+template<typename Lhs, typename Rhs, int ProductType>
+struct ProductReturnType
+{
+  // TODO use the nested type to reduce instanciations ????
+//   typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+//   typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+
+  typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
+};

 template<typename Lhs, typename Rhs>
-class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
-                                                                            typename internal::traits<Rhs>::StorageKind>::ret>
+struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
 {
+  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
+  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
+  typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
+};
+
+template<typename Lhs, typename Rhs>
+struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
+{
+  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
+  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
+  typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
+};
+
+// this is a workaround for sun CC
+template<typename Lhs, typename Rhs>
+struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
+{};
+
+/***********************************************************************
+*  Implementation of Inner Vector Vector Product
+***********************************************************************/
+
+// FIXME : maybe the "inner product" could return a Scalar
+// instead of a 1x1 matrix ??
+// Pro: more natural for the user
+// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
+// product ends up to a row-vector times col-vector product... To tackle this use
+// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
+
+namespace internal {
+
+template<typename Lhs, typename Rhs>
+struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
+ : traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
+{};
+
+}
+
+template<typename Lhs, typename Rhs>
+class GeneralProduct<Lhs, Rhs, InnerProduct>
+  : internal::no_assignment_operator,
+    public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
+{
+    typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
  public:
-    
-    typedef typename ProductImpl<
-        Lhs, Rhs,
-        typename internal::promote_storage_type<typename Lhs::StorageKind,
-                                                typename Rhs::StorageKind>::ret>::Base Base;
-    EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
-
-    typedef typename Lhs::Nested LhsNested;
-    typedef typename Rhs::Nested RhsNested;
-    typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
-    typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
-
-    Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
+    GeneralProduct(const Lhs& lhs, const Rhs& rhs)
    {
-      eigen_assert(lhs.cols() == rhs.rows()
-        && "invalid matrix product"
-        && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
+      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
+        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
+      Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
    }

-    inline Index rows() const { return m_lhs.rows(); }
-    inline Index cols() const { return m_rhs.cols(); }
-
-    const LhsNestedCleaned& lhs() const { return m_lhs; }
-    const RhsNestedCleaned& rhs() const { return m_rhs; }
-
-  protected:
-
-    const LhsNested m_lhs;
-    const RhsNested m_rhs;
+    /** Convertion to scalar */
+    operator const typename Base::Scalar() const {
+      return Base::coeff(0,0);
+    }
 };

+/***********************************************************************
+*  Implementation of Outer Vector Vector Product
+***********************************************************************/
+
+namespace internal {
+template<int StorageOrder> struct outer_product_selector;
+
 template<typename Lhs, typename Rhs>
-class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type
-{
-    typedef Product<Lhs, Rhs> Derived;
-  public:
+struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
+ : traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
+{};

-    typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+}
+
+template<typename Lhs, typename Rhs>
+class GeneralProduct<Lhs, Rhs, OuterProduct>
+  : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
+{
+  public:
+    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
+
+    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
+    {
+      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
+        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+    }
+
+    template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+    {
+      internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
+    }
 };

+namespace internal {
+
+template<> struct outer_product_selector<ColMajor> {
+  template<typename ProductType, typename Dest>
+  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
+    typedef typename Dest::Index Index;
+    // FIXME make sure lhs is sequentially stored
+    // FIXME not very good if rhs is real and lhs complex while alpha is real too
+    const Index cols = dest.cols();
+    for (Index j=0; j<cols; ++j)
+      dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
+  }
+};
+
+template<> struct outer_product_selector<RowMajor> {
+  template<typename ProductType, typename Dest>
+  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
+    typedef typename Dest::Index Index;
+    // FIXME make sure rhs is sequentially stored
+    // FIXME not very good if lhs is real and rhs complex while alpha is real too
+    const Index rows = dest.rows();
+    for (Index i=0; i<rows; ++i)
+      dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
+  }
+};
+
+} // end namespace internal
+
+/***********************************************************************
+*  Implementation of General Matrix Vector Product
+***********************************************************************/
+
+/*  According to the shape/flags of the matrix we have to distinghish 3 different cases:
+ *   1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
+ *   2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
+ *   3 - all other cases are handled using a simple loop along the outer-storage direction.
+ *  Therefore we need a lower level meta selector.
+ *  Furthermore, if the matrix is the rhs, then the product has to be transposed.
+ */
+namespace internal {
+
+template<typename Lhs, typename Rhs>
+struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
+ : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
+{};
+
+template<int Side, int StorageOrder, bool BlasCompatible>
+struct gemv_selector;
+
+} // end namespace internal
+
+template<typename Lhs, typename Rhs>
+class GeneralProduct<Lhs, Rhs, GemvProduct>
+  : public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
+{
+  public:
+    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
+
+    typedef typename Lhs::Scalar LhsScalar;
+    typedef typename Rhs::Scalar RhsScalar;
+
+    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
+    {
+//       EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
+//         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+    }
+
+    enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
+    typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
+
+    template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+    {
+      eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
+      internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
+                       bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
+    }
+};
+
+namespace internal {
+
+// The vector is on the left => transposition
+template<int StorageOrder, bool BlasCompatible>
+struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
+{
+  template<typename ProductType, typename Dest>
+  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  {
+    Transpose<Dest> destT(dest);
+    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
+    gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
+      ::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
+        (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
+  }
+};
+
+template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
+
+template<typename Scalar,int Size,int MaxSize>
+struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
+{
+  EIGEN_STRONG_INLINE  Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
+};
+
+template<typename Scalar,int Size>
+struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
+{
+  EIGEN_STRONG_INLINE Scalar* data() { return 0; }
+};
+
+template<typename Scalar,int Size,int MaxSize>
+struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
+{
+  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
+  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
+};
+
+template<> struct gemv_selector<OnTheRight,ColMajor,true>
+{
+  template<typename ProductType, typename Dest>
+  static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  {
+    typedef typename ProductType::Index Index;
+    typedef typename ProductType::LhsScalar   LhsScalar;
+    typedef typename ProductType::RhsScalar   RhsScalar;
+    typedef typename ProductType::Scalar      ResScalar;
+    typedef typename ProductType::RealScalar  RealScalar;
+    typedef typename ProductType::ActualLhsType ActualLhsType;
+    typedef typename ProductType::ActualRhsType ActualRhsType;
+    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
+    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
+    typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
+
+    const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
+    const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
+                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
+
+    enum {
+      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
+      // on, the other hand it is good for the cache to pack the vector anyways...
+      EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
+      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
+      MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
+    };
+
+    gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
+
+    bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
+    bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
+    
+    RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
+
+    ResScalar* actualDestPtr;
+    bool freeDestPtr = false;
+    if (evalToDest)
+    {
+      actualDestPtr = &dest.coeffRef(0);
+    }
+    else
+    {
+      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      int size = dest.size();
+      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      #endif
+      if((actualDestPtr = static_dest.data())==0)
+      {
+        freeDestPtr = true;
+        actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
+      }
+      if(!alphaIsCompatible)
+      {
+        MappedDest(actualDestPtr, dest.size()).setZero();
+        compatibleAlpha = RhsScalar(1);
+      }
+      else
+        MappedDest(actualDestPtr, dest.size()) = dest;
+    }
+
+    general_matrix_vector_product
+      <Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
+        actualLhs.rows(), actualLhs.cols(),
+        &actualLhs.coeffRef(0,0), actualLhs.outerStride(),
+        actualRhs.data(), actualRhs.innerStride(),
+        actualDestPtr, 1,
+        compatibleAlpha);
+
+    if (!evalToDest)
+    {
+      if(!alphaIsCompatible)
+        dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
+      else
+        dest = MappedDest(actualDestPtr, dest.size());
+      if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
+    }
+  }
+};
+
+template<> struct gemv_selector<OnTheRight,RowMajor,true>
+{
+  template<typename ProductType, typename Dest>
+  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  {
+    typedef typename ProductType::LhsScalar LhsScalar;
+    typedef typename ProductType::RhsScalar RhsScalar;
+    typedef typename ProductType::Scalar    ResScalar;
+    typedef typename ProductType::Index Index;
+    typedef typename ProductType::ActualLhsType ActualLhsType;
+    typedef typename ProductType::ActualRhsType ActualRhsType;
+    typedef typename ProductType::_ActualRhsType _ActualRhsType;
+    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
+    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
+
+    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
+                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
+
+    enum {
+      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
+      // on, the other hand it is good for the cache to pack the vector anyways...
+      DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
+    };
+
+    gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+
+    RhsScalar* actualRhsPtr;
+    bool freeRhsPtr = false;
+    if (DirectlyUseRhs)
+    {
+      actualRhsPtr = const_cast<RhsScalar*>(&actualRhs.coeffRef(0));
+    }
+    else
+    {
+      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      int size = actualRhs.size();
+      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      #endif
+      if((actualRhsPtr = static_rhs.data())==0)
+      {
+        freeRhsPtr = true;
+        actualRhsPtr = ei_aligned_stack_new(RhsScalar, actualRhs.size());
+      }
+      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+    }
+
+    general_matrix_vector_product
+      <Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
+        actualLhs.rows(), actualLhs.cols(),
+        &actualLhs.coeffRef(0,0), actualLhs.outerStride(),
+        actualRhsPtr, 1,
+        &dest.coeffRef(0,0), dest.innerStride(),
+        actualAlpha);
+
+    if((!DirectlyUseRhs) && freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, prod.rhs().size());
+  }
+};
+
+template<> struct gemv_selector<OnTheRight,ColMajor,false>
+{
+  template<typename ProductType, typename Dest>
+  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  {
+    typedef typename Dest::Index Index;
+    // TODO makes sure dest is sequentially stored in memory, otherwise use a temp
+    const Index size = prod.rhs().rows();
+    for(Index k=0; k<size; ++k)
+      dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
+  }
+};
+
+template<> struct gemv_selector<OnTheRight,RowMajor,false>
+{
+  template<typename ProductType, typename Dest>
+  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  {
+    typedef typename Dest::Index Index;
+    // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
+    const Index rows = prod.rows();
+    for(Index i=0; i<rows; ++i)
+      dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
+  }
+};
+
+} // end namespace internal
+
+/***************************************************************************
+* Implementation of matrix base methods
+***************************************************************************/
+
+/** \returns the matrix product of \c *this and \a other.
+  *
+  * \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
+  *
+  * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline const typename ProductReturnType<Derived,OtherDerived>::Type
+MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
+{
+  // A note regarding the function declaration: In MSVC, this function will sometimes
+  // not be inlined since DenseStorage is an unwindable object for dynamic
+  // matrices and product types are holding a member to store the result.
+  // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
+  enum {
+    ProductIsValid =  Derived::ColsAtCompileTime==Dynamic
+                   || OtherDerived::RowsAtCompileTime==Dynamic
+                   || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
+    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
+  };
+  // note to the lost user:
+  //    * for a dot product use: v1.dot(v2)
+  //    * for a coeff-wise product use: v1.cwiseProduct(v2)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
+    INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+#ifdef EIGEN_DEBUG_PRODUCT
+  internal::product_type<Derived,OtherDerived>::debug();
+#endif
+  return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+}
+
+/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
+  *
+  * The returned product will behave like any other expressions: the coefficients of the product will be
+  * computed once at a time as requested. This might be useful in some extremely rare cases when only
+  * a small and no coherent fraction of the result's coefficients have to be computed.
+  *
+  * \warning This version of the matrix product can be much much slower. So use it only if you know
+  * what you are doing and that you measured a true speed improvement.
+  *
+  * \sa operator*(const MatrixBase&)
+  */
+template<typename Derived>
+template<typename OtherDerived>
+const typename LazyProductReturnType<Derived,OtherDerived>::Type
+MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
+{
+  enum {
+    ProductIsValid =  Derived::ColsAtCompileTime==Dynamic
+                   || OtherDerived::RowsAtCompileTime==Dynamic
+                   || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
+    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
+  };
+  // note to the lost user:
+  //    * for a dot product use: v1.dot(v2)
+  //    * for a coeff-wise product use: v1.cwiseProduct(v2)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
+    INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+
+  return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+}
+
 #endif // EIGEN_PRODUCT_H
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@@ -115,10 +115,10 @@ class ProductBase : public MatrixBase<Derived>
    inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); }

    template<typename Dest>
-    inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); }
+    inline void addTo(Dest& dst) const { scaleAndAddTo(dst,1); }

    template<typename Dest>
-    inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); }
+    inline void subTo(Dest& dst) const { scaleAndAddTo(dst,-1); }

    template<typename Dest>
    inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { derived().scaleAndAddTo(dst,alpha); }
@@ -179,8 +179,8 @@ class ProductBase : public MatrixBase<Derived>

  protected:

-    LhsNested m_lhs;
-    RhsNested m_rhs;
+    const LhsNested m_lhs;
+    const RhsNested m_rhs;

    mutable PlainObject m_result;
 };
@@ -256,16 +256,16 @@ class ScaledProduct
    : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {}

    template<typename Dest>
-    inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); }
+    inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,m_alpha); }

    template<typename Dest>
-    inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); }
+    inline void addTo(Dest& dst) const { scaleAndAddTo(dst,m_alpha); }

    template<typename Dest>
-    inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); }
+    inline void subTo(Dest& dst) const { scaleAndAddTo(dst,-m_alpha); }

    template<typename Dest>
-    inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha * m_alpha); }
+    inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha); }

    const Scalar& alpha() const { return m_alpha; }
    
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -95,7 +95,7 @@ struct redux_novec_unroller

  typedef typename Derived::Scalar Scalar;

-  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
+  EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func& func)
  {
    return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
                redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
@@ -112,7 +112,7 @@ struct redux_novec_unroller<Func, Derived, Start, 1>

  typedef typename Derived::Scalar Scalar;

-  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
+  EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func&)
  {
    return mat.coeffByOuterInner(outer, inner);
  }
@@ -125,7 +125,7 @@ template<typename Func, typename Derived, int Start>
 struct redux_novec_unroller<Func, Derived, Start, 0>
 {
  typedef typename Derived::Scalar Scalar;
-  static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
+  EIGEN_STRONG_INLINE static Scalar run(const Derived&, const Func&) { return Scalar(); }
 };

 /*** vectorization ***/
@@ -141,7 +141,7 @@ struct redux_vec_unroller
  typedef typename Derived::Scalar Scalar;
  typedef typename packet_traits<Scalar>::type PacketScalar;

-  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
+  EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func& func)
  {
    return func.packetOp(
            redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
@@ -162,7 +162,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
  typedef typename Derived::Scalar Scalar;
  typedef typename packet_traits<Scalar>::type PacketScalar;

-  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
+  EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&)
  {
    return mat.template packetByOuterInner<alignment>(outer, inner);
  }
@@ -214,33 +214,20 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
    const Index size = mat.size();
    eigen_assert(size && "you are using an empty matrix");
    const Index packetSize = packet_traits<Scalar>::size;
-    const Index alignedStart = internal::first_aligned(mat);
+    const Index alignedStart = first_aligned(mat);
    enum {
      alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
                ? Aligned : Unaligned
    };
-    const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
-    const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
-    const Index alignedEnd2 = alignedStart + alignedSize2;
-    const Index alignedEnd  = alignedStart + alignedSize;
+    const Index alignedSize = ((size-alignedStart)/packetSize)*packetSize;
+    const Index alignedEnd = alignedStart + alignedSize;
    Scalar res;
    if(alignedSize)
    {
-      PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
-      if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
-      {
-        PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
-        for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
-        {
-          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
-          packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
-        }
-
-        packet_res0 = func.packetOp(packet_res0,packet_res1);
-        if(alignedEnd>alignedEnd2)
-          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
-      }
-      res = func.predux(packet_res0);
+      PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
+      for(Index index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
+        packet_res = func.packetOp(packet_res, mat.template packet<alignment>(index));
+      res = func.predux(packet_res);

      for(Index index = 0; index < alignedStart; ++index)
        res = func(res,mat.coeff(index));
@@ -309,7 +296,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
    Size = Derived::SizeAtCompileTime,
    VectorizedSize = (Size / PacketSize) * PacketSize
  };
-  static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
+  EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
  {
    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
    Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
--- a/Eigen/src/Core/Replicate.h
+++ b/Eigen/src/Core/Replicate.h
@@ -122,13 +122,9 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
      return m_matrix.template packet<LoadMode>(actual_row, actual_col);
    }

-    const typename internal::remove_all<typename MatrixType::Nested>::type& nestedExpression() const 
-    { 
-      return m_matrix; 
-    }

  protected:
-    typename MatrixType::Nested m_matrix;
+    const typename MatrixType::Nested m_matrix;
    const internal::variable_if_dynamic<Index, RowFactor> m_rowFactor;
    const internal::variable_if_dynamic<Index, ColFactor> m_colFactor;
 };
--- a/Eigen/src/Core/Reverse.h
+++ b/Eigen/src/Core/Reverse.h
@@ -183,14 +183,8 @@ template<typename MatrixType, int Direction> class Reverse
      m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
    }

-    const typename internal::remove_all<typename MatrixType::Nested>::type& 
-    nestedExpression() const 
-    {
-      return m_matrix;
-    }
-
  protected:
-    typename MatrixType::Nested m_matrix;
+    const typename MatrixType::Nested m_matrix;
 };

 /** \returns an expression of the reverse of *this.
--- a/Eigen/src/Core/Select.h
+++ b/Eigen/src/Core/Select.h
@@ -101,25 +101,10 @@ class Select : internal::no_assignment_operator,
        return m_else.coeff(i);
    }

-    const ConditionMatrixType& conditionMatrix() const
-    {
-      return m_condition;
-    }
-
-    const ThenMatrixType& thenMatrix() const
-    {
-      return m_then;
-    }
-
-    const ElseMatrixType& elseMatrix() const
-    {
-      return m_else;
-    }
-
  protected:
-    typename ConditionMatrixType::Nested m_condition;
-    typename ThenMatrixType::Nested m_then;
-    typename ElseMatrixType::Nested m_else;
+    const typename ConditionMatrixType::Nested m_condition;
+    const typename ThenMatrixType::Nested m_then;
+    const typename ElseMatrixType::Nested m_else;
 };


--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -32,13 +32,13 @@
  * \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix
  *
  * \param MatrixType the type of the dense matrix storing the coefficients
-  * \param TriangularPart can be either \c #Lower or \c #Upper
+  * \param TriangularPart can be either \c Lower or \c Upper
  *
  * This class is an expression of a sefladjoint matrix from a triangular part of a matrix
  * with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()
  * and most of the time this is the only way that it is used.
  *
-  * \sa class TriangularBase, MatrixBase::selfadjointView()
+  * \sa class TriangularBase, MatrixBase::selfAdjointView()
  */

 namespace internal {
@@ -82,7 +82,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
    };
    typedef typename MatrixType::PlainObject PlainObject;

-    inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
+    inline SelfAdjointView(const MatrixType& matrix) : m_matrix(matrix)
    {}

    inline Index rows() const { return m_matrix.rows(); }
@@ -199,7 +199,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
    #endif

  protected:
-    MatrixTypeNested m_matrix;
+    const MatrixTypeNested m_matrix;
 };


@@ -222,7 +222,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
    row = (UnrollCount-1) % Derived1::RowsAtCompileTime
  };

-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);

@@ -236,7 +236,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
 template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
 {
-  static inline void run(Derived1 &, const Derived2 &) {}
+  inline static void run(Derived1 &, const Derived2 &) {}
 };

 template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
@@ -247,7 +247,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
    row = (UnrollCount-1) % Derived1::RowsAtCompileTime
  };

-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);

@@ -261,14 +261,14 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
 template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
 {
-  static inline void run(Derived1 &, const Derived2 &) {}
+  inline static void run(Derived1 &, const Derived2 &) {}
 };

 template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
 {
  typedef typename Derived1::Index Index;
-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    for(Index j = 0; j < dst.cols(); ++j)
    {
@@ -285,7 +285,7 @@ struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dyn
 template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
 {
-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
  typedef typename Derived1::Index Index;
    for(Index i = 0; i < dst.rows(); ++i)
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -163,16 +163,6 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
      return Base::operator=(rhs);
    }

-    Lhs& expression() const 
-    { 
-      return m_matrix;
-    }
-
-    const BinaryOp& functor() const 
-    { 
-      return m_functor;
-    }
-
  protected:
    Lhs& m_matrix;
    const BinaryOp& m_functor;
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -74,19 +74,26 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
    // FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1

    bool useRhsDirectly = Rhs::InnerStrideAtCompileTime==1 || rhs.innerStride()==1;
-
-    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(),
-                                                  (useRhsDirectly ? rhs.data() : 0));
-                                                  
-    if(!useRhsDirectly)
+    RhsScalar* actualRhs;
+    if(useRhsDirectly)
+    {
+      actualRhs = &rhs.coeffRef(0);
+    }
+    else
+    {
+      actualRhs = ei_aligned_stack_new(RhsScalar,rhs.size());
      MappedRhs(actualRhs,rhs.size()) = rhs;
+    }

    triangular_solve_vector<LhsScalar, RhsScalar, typename Lhs::Index, Side, Mode, LhsProductTraits::NeedToConjugate,
                            (int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>
      ::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs);

    if(!useRhsDirectly)
+    {
      rhs = MappedRhs(actualRhs, rhs.size());
+      ei_aligned_stack_delete(RhsScalar, actualRhs, rhs.size());
+    }
  }
 };

@@ -100,7 +107,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
  typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
  static void run(const Lhs& lhs, Rhs& rhs)
  {
-    typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
+    const ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
    triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
                               (Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
      ::run(lhs.rows(), Side==OnTheLeft? rhs.cols() : rhs.rows(), &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride());
@@ -177,8 +184,10 @@ template<int Side, typename OtherDerived>
 void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
 {
  OtherDerived& other = _other.const_cast_derived();
-  eigen_assert( cols() == rows() && ((Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols())) );
-  eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
+  eigen_assert(cols() == rows());
+  eigen_assert( (Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols()) );
+  eigen_assert(!(Mode & ZeroDiag));
+  eigen_assert(Mode & (Upper|Lower));

  enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit  && OtherDerived::IsVectorAtCompileTime };
  typedef typename internal::conditional<copy,
@@ -253,7 +262,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv

  protected:
    const TriangularType& m_triangularMatrix;
-    typename Rhs::Nested m_rhs;
+    const typename Rhs::Nested m_rhs;
 };

 } // namespace internal
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -56,11 +56,10 @@ template<typename Derived>
 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
 MatrixBase<Derived>::stableNorm() const
 {
-  using std::min;
  const Index blockSize = 4096;
-  RealScalar scale(0);
-  RealScalar invScale(1);
-  RealScalar ssq(0); // sum of square
+  RealScalar scale = 0;
+  RealScalar invScale = 1;
+  RealScalar ssq = 0; // sum of square
  enum {
    Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
  };
@@ -69,7 +68,7 @@ MatrixBase<Derived>::stableNorm() const
  if (bi>0)
    internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
  for (; bi<n; bi+=blockSize)
-    internal::stable_norm_kernel(this->segment(bi,(min)(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
+    internal::stable_norm_kernel(this->segment(bi,std::min(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
  return scale * internal::sqrt(ssq);
 }

@@ -86,9 +85,6 @@ template<typename Derived>
 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
 MatrixBase<Derived>::blueNorm() const
 {
-  using std::pow;
-  using std::min;
-  using std::max;
  static Index nmax = -1;
  static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
  if(nmax <= 0)
@@ -103,25 +99,25 @@ MatrixBase<Derived>::blueNorm() const
    // For portability, the PORT subprograms "ilmaeh" and "rlmach"
    // are used. For any specific computer, each of the assignment
    // statements can be replaced
-    nbig  = (std::numeric_limits<Index>::max)();            // largest integer
+    nbig  = std::numeric_limits<Index>::max();            // largest integer
    ibeta = std::numeric_limits<RealScalar>::radix;         // base for floating-point numbers
    it    = std::numeric_limits<RealScalar>::digits;        // number of base-beta digits in mantissa
    iemin = std::numeric_limits<RealScalar>::min_exponent;  // minimum exponent
    iemax = std::numeric_limits<RealScalar>::max_exponent;  // maximum exponent
-    rbig  = (std::numeric_limits<RealScalar>::max)();         // largest floating-point number
+    rbig  = std::numeric_limits<RealScalar>::max();         // largest floating-point number

    iexp  = -((1-iemin)/2);
-    b1    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));  // lower boundary of midrange
+    b1    = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp)));  // lower boundary of midrange
    iexp  = (iemax + 1 - it)/2;
-    b2    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // upper boundary of midrange
+    b2    = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp)));   // upper boundary of midrange

    iexp  = (2-iemin)/2;
-    s1m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for lower range
+    s1m   = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for lower range
    iexp  = - ((iemax+it)/2);
-    s2m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for upper range
+    s2m   = RealScalar(std::pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for upper range

    overfl  = rbig*s2m;             // overflow boundary for abig
-    eps     = RealScalar(pow(double(ibeta), 1-it));
+    eps     = RealScalar(std::pow(double(ibeta), 1-it));
    relerr  = internal::sqrt(eps);         // tolerance for neglecting asml
    abig    = RealScalar(1.0/eps - 1.0);
    if (RealScalar(nbig)>abig)  nmax = int(abig);  // largest safe n
@@ -167,8 +163,8 @@ MatrixBase<Derived>::blueNorm() const
  }
  else
    return internal::sqrt(amed);
-  asml = (min)(abig, amed);
-  abig = (max)(abig, amed);
+  asml = std::min(abig, amed);
+  abig = std::max(abig, amed);
  if(asml <= abig*relerr)
    return abig;
  else
--- a/Eigen/src/Core/Swap.h
+++ b/Eigen/src/Core/Swap.h
@@ -52,15 +52,6 @@ template<typename ExpressionType> class SwapWrapper
    inline Index cols() const { return m_expression.cols(); }
    inline Index outerStride() const { return m_expression.outerStride(); }
    inline Index innerStride() const { return m_expression.innerStride(); }
-    
-    typedef typename internal::conditional<
-                       internal::is_lvalue<ExpressionType>::value,
-                       Scalar,
-                       const Scalar
-                     >::type ScalarWithConstIfNotLvalue;
-                     
-    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
-    inline const Scalar* data() const { return m_expression.data(); }

    inline Scalar& coeffRef(Index row, Index col)
    {
@@ -128,8 +119,6 @@ template<typename ExpressionType> class SwapWrapper
      _other.template writePacket<LoadMode>(index, tmp);
    }

-    ExpressionType& expression() const { return m_expression; }
-
  protected:
    ExpressionType& m_expression;
 };
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -91,7 +91,7 @@ template<typename MatrixType> class Transpose
    nestedExpression() { return m_matrix.const_cast_derived(); }

  protected:
-    typename MatrixType::Nested m_matrix;
+    const typename MatrixType::Nested m_matrix;
 };

 namespace internal {
@@ -152,12 +152,12 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
      return derived().nestedExpression().coeffRef(index);
    }

-    inline CoeffReturnType coeff(Index row, Index col) const
+    inline const CoeffReturnType coeff(Index row, Index col) const
    {
      return derived().nestedExpression().coeff(col, row);
    }

-    inline CoeffReturnType coeff(Index index) const
+    inline const CoeffReturnType coeff(Index index) const
    {
      return derived().nestedExpression().coeff(index);
    }
@@ -350,14 +350,15 @@ struct blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
 template<bool DestIsTransposed, typename OtherDerived>
 struct check_transpose_aliasing_compile_time_selector
 {
-  enum { ret = bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed };
+  enum { ret = blas_traits<OtherDerived>::IsTransposed != DestIsTransposed
+  };
 };

 template<bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
 struct check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
 {
-  enum { ret =    bool(blas_traits<DerivedA>::IsTransposed) != DestIsTransposed
-               || bool(blas_traits<DerivedB>::IsTransposed) != DestIsTransposed
+  enum { ret =    blas_traits<DerivedA>::IsTransposed != DestIsTransposed
+               || blas_traits<DerivedB>::IsTransposed != DestIsTransposed
  };
 };

@@ -366,7 +367,7 @@ struct check_transpose_aliasing_run_time_selector
 {
  static bool run(const Scalar* dest, const OtherDerived& src)
  {
-    return (bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed) && (dest!=0 && dest==(Scalar*)extract_data(src));
+    return (blas_traits<OtherDerived>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(Scalar*)extract_data(src));
  }
 };

--- a/Eigen/src/Core/Transpositions.h
+++ b/Eigen/src/Core/Transpositions.h
@@ -404,7 +404,7 @@ struct transposition_matrix_product_retval

  protected:
    const TranspositionType& m_transpositions;
-    typename MatrixType::Nested m_matrix;
+    const typename MatrixType::Nested m_matrix;
 };

 } // end namespace internal
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -111,7 +111,6 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
      EIGEN_ONLY_USED_FOR_DEBUG(col);
      eigen_assert(col>=0 && col<cols() && row>=0 && row<rows());
      const int mode = int(Mode) & ~SelfAdjoint;
-      EIGEN_ONLY_USED_FOR_DEBUG(mode);
      eigen_assert((mode==Upper && col>=row)
                || (mode==Lower && col<=row)
                || ((mode==StrictlyUpper || mode==UnitUpper) && col>row)
@@ -135,13 +134,13 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
  * \brief Base class for triangular part in a matrix
  *
  * \param MatrixType the type of the object in which we are taking the triangular part
-  * \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
-  *             #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
-  *             This is in fact a bit field; it must have either #Upper or #Lower, 
-  *             and additionnaly it may have #UnitDiag or #ZeroDiag or neither.
+  * \param Mode the kind of triangular matrix expression to construct. Can be Upper,
+  *             Lower, UpperSelfadjoint, or LowerSelfadjoint. This is in fact a bit field;
+  *             it must have either Upper or Lower, and additionnaly it may have either
+  *             UnitDiag or Selfadjoint.
  *
  * This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular
-  * matrices one should speak of "trapezoid" parts. This class is the return type
+  * matrices one should speak ok "trapezoid" parts. This class is the return type
  * of MatrixBase::triangularView() and most of the time this is the only way it is used.
  *
  * \sa MatrixBase::triangularView()
@@ -273,8 +272,11 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
    inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
    { return m_matrix.conjugate(); }

+    /** \sa MatrixBase::adjoint() */
+    inline TriangularView<typename MatrixType::AdjointReturnType,TransposeMode> adjoint()
+    { return m_matrix.adjoint(); }
    /** \sa MatrixBase::adjoint() const */
-    inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
+    inline const TriangularView<typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
    { return m_matrix.adjoint(); }

    /** \sa MatrixBase::transpose() */
@@ -285,13 +287,11 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
    }
    /** \sa MatrixBase::transpose() const */
    inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
-    {
-      return m_matrix.transpose();
-    }
+    { return m_matrix.transpose(); }

    /** Efficient triangular matrix times vector/matrix product */
    template<typename OtherDerived>
-    TriangularProduct<Mode,true,MatrixType,false,OtherDerived, OtherDerived::IsVectorAtCompileTime>
+    TriangularProduct<Mode,true,MatrixType,false,OtherDerived,OtherDerived::IsVectorAtCompileTime>
    operator*(const MatrixBase<OtherDerived>& rhs) const
    {
      return TriangularProduct
@@ -374,8 +374,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
    template<typename OtherDerived>
    void swap(MatrixBase<OtherDerived> const & other)
    {
-      SwapWrapper<MatrixType> swaper(const_cast<MatrixType&>(m_matrix));
-      TriangularView<SwapWrapper<MatrixType>,Mode>(swaper).lazyAssign(other.derived());
+      TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
    }

    Scalar determinant() const
@@ -433,7 +432,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
    template<typename ProductDerived, typename Lhs, typename Rhs>
    EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);

-    MatrixTypeNested m_matrix;
+    const MatrixTypeNested m_matrix;
 };

 /***************************************************************************
@@ -449,10 +448,8 @@ struct triangular_assignment_selector
    col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
    row = (UnrollCount-1) % Derived1::RowsAtCompileTime
  };
-  
-  typedef typename Derived1::Scalar Scalar;

-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);

@@ -469,9 +466,9 @@ struct triangular_assignment_selector
    else if(ClearOpposite)
    {
      if (Mode&UnitDiag && row==col)
-        dst.coeffRef(row, col) = Scalar(1);
+        dst.coeffRef(row, col) = 1;
      else
-        dst.coeffRef(row, col) = Scalar(0);
+        dst.coeffRef(row, col) = 0;
    }
  }
 };
@@ -480,24 +477,23 @@ struct triangular_assignment_selector
 template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
 {
-  static inline void run(Derived1 &, const Derived2 &) {}
+  inline static void run(Derived1 &, const Derived2 &) {}
 };

 template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearOpposite>
 {
  typedef typename Derived1::Index Index;
-  typedef typename Derived1::Scalar Scalar;
-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    for(Index j = 0; j < dst.cols(); ++j)
    {
-      Index maxi = (std::min)(j, dst.rows()-1);
+      Index maxi = std::min(j, dst.rows()-1);
      for(Index i = 0; i <= maxi; ++i)
        dst.copyCoeff(i, j, src);
      if (ClearOpposite)
        for(Index i = maxi+1; i < dst.rows(); ++i)
-          dst.coeffRef(i, j) = Scalar(0);
+          dst.coeffRef(i, j) = 0;
    }
  }
 };
@@ -506,16 +502,16 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
 {
  typedef typename Derived1::Index Index;
-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    for(Index j = 0; j < dst.cols(); ++j)
    {
      for(Index i = j; i < dst.rows(); ++i)
        dst.copyCoeff(i, j, src);
-      Index maxi = (std::min)(j, dst.rows());
+      Index maxi = std::min(j, dst.rows());
      if (ClearOpposite)
        for(Index i = 0; i < maxi; ++i)
-          dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
+          dst.coeffRef(i, j) = 0;
    }
  }
 };
@@ -524,11 +520,11 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite>
 {
  typedef typename Derived1::Index Index;
-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    for(Index j = 0; j < dst.cols(); ++j)
    {
-      Index maxi = (std::min)(j, dst.rows());
+      Index maxi = std::min(j, dst.rows());
      for(Index i = 0; i < maxi; ++i)
        dst.copyCoeff(i, j, src);
      if (ClearOpposite)
@@ -542,16 +538,16 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
 {
  typedef typename Derived1::Index Index;
-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    for(Index j = 0; j < dst.cols(); ++j)
    {
      for(Index i = j+1; i < dst.rows(); ++i)
        dst.copyCoeff(i, j, src);
-      Index maxi = (std::min)(j, dst.rows()-1);
+      Index maxi = std::min(j, dst.rows()-1);
      if (ClearOpposite)
        for(Index i = 0; i <= maxi; ++i)
-          dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
+          dst.coeffRef(i, j) = 0;
    }
  }
 };
@@ -560,11 +556,11 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
 {
  typedef typename Derived1::Index Index;
-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    for(Index j = 0; j < dst.cols(); ++j)
    {
-      Index maxi = (std::min)(j, dst.rows());
+      Index maxi = std::min(j, dst.rows());
      for(Index i = 0; i < maxi; ++i)
        dst.copyCoeff(i, j, src);
      if (ClearOpposite)
@@ -580,11 +576,11 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
 {
  typedef typename Derived1::Index Index;
-  static inline void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    for(Index j = 0; j < dst.cols(); ++j)
    {
-      Index maxi = (std::min)(j, dst.rows());
+      Index maxi = std::min(j, dst.rows());
      for(Index i = maxi+1; i < dst.rows(); ++i)
        dst.copyCoeff(i, j, src);
      if (ClearOpposite)
@@ -760,8 +756,8 @@ typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Deriv
 /**
  * \returns an expression of a triangular view extracted from the current matrix
  *
-  * The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
-  * \c #Lower, \c #StrictlyLower, \c #UnitLower.
+  * The parameter \a Mode can have the following values: \c Upper, \c StrictlyUpper, \c UnitUpper,
+  * \c Lower, \c StrictlyLower, \c UnitLower.
  *
  * Example: \include MatrixBase_extract.cpp
  * Output: \verbinclude MatrixBase_extract.out
@@ -796,7 +792,7 @@ bool MatrixBase<Derived>::isUpperTriangular(RealScalar prec) const
  RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
  for(Index j = 0; j < cols(); ++j)
  {
-    Index maxi = (std::min)(j, rows()-1);
+    Index maxi = std::min(j, rows()-1);
    for(Index i = 0; i <= maxi; ++i)
    {
      RealScalar absValue = internal::abs(coeff(i,j));
@@ -828,7 +824,7 @@ bool MatrixBase<Derived>::isLowerTriangular(RealScalar prec) const
  RealScalar threshold = maxAbsOnLowerPart * prec;
  for(Index j = 1; j < cols(); ++j)
  {
-    Index maxi = (std::min)(j, rows()-1);
+    Index maxi = std::min(j, rows()-1);
    for(Index i = 0; i < maxi; ++i)
      if(internal::abs(coeff(i, j)) > threshold) return false;
  }
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -31,9 +31,9 @@
  *
  * \brief Generic expression of a partially reduxed matrix
  *
-  * \tparam MatrixType the type of the matrix we are applying the redux operation
-  * \tparam MemberOp type of the member functor
-  * \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
+  * \param MatrixType the type of the matrix we are applying the redux operation
+  * \param MemberOp type of the member functor
+  * \param Direction indicates the direction of the redux (Vertical or Horizontal)
  *
  * This class represents an expression of a partial redux operator of a matrix.
  * It is the return type of some VectorwiseOp functions,
@@ -110,7 +110,7 @@ class PartialReduxExpr : internal::no_assignment_operator,
    }

  protected:
-    MatrixTypeNested m_matrix;
+    const MatrixTypeNested m_matrix;
    const MemberOp m_functor;
 };

@@ -164,7 +164,7 @@ struct member_redux {
  * \brief Pseudo expression providing partial reduction operations
  *
  * \param ExpressionType the type of the object on which to do partial reductions
-  * \param Direction indicates the direction of the redux (#Vertical or #Horizontal)
+  * \param Direction indicates the direction of the redux (Vertical or Horizontal)
  *
  * This class represents a pseudo expression with partial reduction features.
  * It is the return type of DenseBase::colwise() and DenseBase::rowwise()
@@ -237,10 +237,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
    typename ExtendedType<OtherDerived>::Type
    extendedTo(const DenseBase<OtherDerived>& other) const
    {
-      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxColsAtCompileTime==1),
-                          YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
-      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxRowsAtCompileTime==1),
-                          YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
      return typename ExtendedType<OtherDerived>::Type
                      (other.derived(),
                       Direction==Vertical   ? 1 : m_matrix.rows(),
@@ -421,9 +418,10 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
    ExpressionType& operator=(const DenseBase<OtherDerived>& other)
    {
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
-      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
      //eigen_assert((m_matrix.isNull()) == (other.isNull())); FIXME
-      return const_cast<ExpressionType&>(m_matrix = extendedTo(other.derived()));
+      for(Index j=0; j<subVectors(); ++j)
+        subVector(j) = other;
+      return const_cast<ExpressionType&>(m_matrix);
    }

    /** Adds the vector \a other to each subvector of \c *this */
@@ -431,8 +429,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
    ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
    {
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
-      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
-      return const_cast<ExpressionType&>(m_matrix += extendedTo(other.derived()));
+      for(Index j=0; j<subVectors(); ++j)
+        subVector(j) += other.derived();
+      return const_cast<ExpressionType&>(m_matrix);
    }

    /** Substracts the vector \a other to each subvector of \c *this */
@@ -440,29 +439,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
    ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
    {
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
-      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
-      return const_cast<ExpressionType&>(m_matrix -= extendedTo(other.derived()));
-    }
-
-    /** Multiples each subvector of \c *this by the vector \a other */
-    template<typename OtherDerived>
-    ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
-      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
-      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
-      m_matrix *= extendedTo(other.derived());
-      return const_cast<ExpressionType&>(m_matrix);
-    }
-
-    /** Divides each subvector of \c *this by the vector \a other */
-    template<typename OtherDerived>
-    ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
-      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
-      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
-      m_matrix /= extendedTo(other.derived());
+      for(Index j=0; j<subVectors(); ++j)
+        subVector(j) -= other.derived();
      return const_cast<ExpressionType&>(m_matrix);
    }

@@ -473,8 +451,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
                  const typename ExtendedType<OtherDerived>::Type>
    operator+(const DenseBase<OtherDerived>& other) const
    {
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
-      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
      return m_matrix + extendedTo(other.derived());
    }

@@ -485,39 +462,10 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
                  const typename ExtendedType<OtherDerived>::Type>
    operator-(const DenseBase<OtherDerived>& other) const
    {
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
-      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived);
      return m_matrix - extendedTo(other.derived());
    }

-    /** Returns the expression where each subvector is the product of the vector \a other
-      * by the corresponding subvector of \c *this */
-    template<typename OtherDerived> EIGEN_STRONG_INLINE
-    CwiseBinaryOp<internal::scalar_product_op<Scalar>,
-                  const ExpressionTypeNestedCleaned,
-                  const typename ExtendedType<OtherDerived>::Type>
-    operator*(const DenseBase<OtherDerived>& other) const
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
-      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
-      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
-      return m_matrix * extendedTo(other.derived());
-    }
-
-    /** Returns the expression where each subvector is the quotient of the corresponding
-      * subvector of \c *this by the vector \a other */
-    template<typename OtherDerived>
-    CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
-                  const ExpressionTypeNestedCleaned,
-                  const typename ExtendedType<OtherDerived>::Type>
-    operator/(const DenseBase<OtherDerived>& other) const
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
-      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
-      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
-      return m_matrix / extendedTo(other.derived());
-    }
-
 /////////// Geometry module ///////////

    #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
@@ -561,7 +509,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
  * Example: \include MatrixBase_colwise.cpp
  * Output: \verbinclude MatrixBase_colwise.out
  *
-  * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
+  * \sa rowwise(), class VectorwiseOp
  */
 template<typename Derived>
 inline const typename DenseBase<Derived>::ConstColwiseReturnType
@@ -572,7 +520,7 @@ DenseBase<Derived>::colwise() const

 /** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
  *
-  * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
+  * \sa rowwise(), class VectorwiseOp
  */
 template<typename Derived>
 inline typename DenseBase<Derived>::ColwiseReturnType
@@ -586,7 +534,7 @@ DenseBase<Derived>::colwise()
  * Example: \include MatrixBase_rowwise.cpp
  * Output: \verbinclude MatrixBase_rowwise.out
  *
-  * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
+  * \sa colwise(), class VectorwiseOp
  */
 template<typename Derived>
 inline const typename DenseBase<Derived>::ConstRowwiseReturnType
@@ -597,7 +545,7 @@ DenseBase<Derived>::rowwise() const

 /** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
  *
-  * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
+  * \sa colwise(), class VectorwiseOp
  */
 template<typename Derived>
 inline typename DenseBase<Derived>::RowwiseReturnType
--- a/Eigen/src/Core/Visitor.h
+++ b/Eigen/src/Core/Visitor.h
@@ -35,7 +35,7 @@ struct visitor_impl
    row = (UnrollCount-1) % Derived::RowsAtCompileTime
  };

-  static inline void run(const Derived &mat, Visitor& visitor)
+  inline static void run(const Derived &mat, Visitor& visitor)
  {
    visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);
    visitor(mat.coeff(row, col), row, col);
@@ -45,7 +45,7 @@ struct visitor_impl
 template<typename Visitor, typename Derived>
 struct visitor_impl<Visitor, Derived, 1>
 {
-  static inline void run(const Derived &mat, Visitor& visitor)
+  inline static void run(const Derived &mat, Visitor& visitor)
  {
    return visitor.init(mat.coeff(0, 0), 0, 0);
  }
@@ -55,7 +55,7 @@ template<typename Visitor, typename Derived>
 struct visitor_impl<Visitor, Derived, Dynamic>
 {
  typedef typename Derived::Index Index;
-  static inline void run(const Derived& mat, Visitor& visitor)
+  inline static void run(const Derived& mat, Visitor& visitor)
  {
    visitor.init(mat.coeff(0,0), 0, 0);
    for(Index i = 1; i < mat.rows(); ++i)
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -168,7 +168,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
 template<int Offset>
 struct palign_impl<Offset,Packet2cf>
 {
-  static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+  EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
  {
    if (Offset==1)
    {
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -487,7 +487,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
 template<int Offset>
 struct palign_impl<Offset,Packet4f>
 {
-  static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+  EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
  {
    if (Offset!=0)
      first = vec_sld(first, second, Offset*4);
@@ -497,7 +497,7 @@ struct palign_impl<Offset,Packet4f>
 template<int Offset>
 struct palign_impl<Offset,Packet4i>
 {
-  static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+  EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
  {
    if (Offset!=0)
      first = vec_sld(first, second, Offset*4);
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -27,8 +27,8 @@

 namespace internal {

-static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
-static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);
+static uint32x4_t p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
+static uint32x2_t p2ui_CONJ_XOR = { 0x00000000, 0x80000000 };

 //---------- float ----------
 struct Packet2cf
@@ -43,7 +43,6 @@ template<> struct packet_traits<std::complex<float> >  : default_packet_traits
  typedef Packet2cf type;
  enum {
    Vectorizable = 1,
-    AlignedOnScalar = 1,
    size = 2,

    HasAdd    = 1,
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -52,16 +52,6 @@ typedef uint32x4_t  Packet4ui;
 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
  const Packet4i p4i_##NAME = pset1<Packet4i>(X)

-#if defined(__llvm__) && !defined(__clang__)
-  //Special treatment for Apple's llvm-gcc, its NEON packet types are unions
-  #define EIGEN_INIT_NEON_PACKET2(X, Y)       {{X, Y}}
-  #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
-#else
-  //Default initializer for packets
-  #define EIGEN_INIT_NEON_PACKET2(X, Y)       {X, Y}
-  #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
-#endif
-    
 #ifndef __pld
 #define __pld(x) asm volatile ( "   pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
 #endif
@@ -94,7 +84,7 @@ template<> struct packet_traits<int>    : default_packet_traits
  };
 };

-#if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__)
+#if EIGEN_GNUC_AT_MOST(4,4)
 // workaround gcc 4.2, 4.3 and 4.4 compilatin issue
 EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
 EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
@@ -110,12 +100,12 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from)   {

 template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a)
 {
-  Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
+  Packet4f countdown = { 3, 2, 1, 0 };
  return vaddq_f32(pset1<Packet4f>(a), countdown);
 }
 template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a)
 {
-  Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
+  Packet4i countdown = { 3, 2, 1, 0 };
  return vaddq_s32(pset1<Packet4i>(a), countdown);
 }

@@ -201,14 +191,14 @@ template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float*   from)
 {
  float32x2_t lo, hi;
  lo = vdup_n_f32(*from);
-  hi = vdup_n_f32(*(from+1));
+  hi = vdup_n_f32(*from);
  return vcombine_f32(lo, hi);
 }
 template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int*     from)
 {
  int32x2_t lo, hi;
  lo = vdup_n_s32(*from);
-  hi = vdup_n_s32(*(from+1));
+  hi = vdup_n_s32(*from);
  return vcombine_s32(lo, hi);
 }

@@ -405,29 +395,25 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
  return s[0];
 }

-// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
-// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
-#define PALIGN_NEON(Offset,Type,Command) \
-template<>\
-struct palign_impl<Offset,Type>\
-{\
-    EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
-    {\
-        if (Offset!=0)\
-            first = Command(first, second, Offset);\
-    }\
-};\
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+  EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
+  {
+    if (Offset!=0)
+      first = vextq_f32(first, second, Offset);
+  }
+};

-PALIGN_NEON(0,Packet4f,vextq_f32)
-PALIGN_NEON(1,Packet4f,vextq_f32)
-PALIGN_NEON(2,Packet4f,vextq_f32)
-PALIGN_NEON(3,Packet4f,vextq_f32)
-PALIGN_NEON(0,Packet4i,vextq_s32)
-PALIGN_NEON(1,Packet4i,vextq_s32)
-PALIGN_NEON(2,Packet4i,vextq_s32)
-PALIGN_NEON(3,Packet4i,vextq_s32)
-    
-#undef PALIGN_NEON
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+  EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
+  {
+    if (Offset!=0)
+      first = vextq_s32(first, second, Offset);
+  }
+};

 } // end namespace internal

--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -102,7 +102,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo
  Packet2cf res;
  #if EIGEN_GNUC_AT_MOST(4,2)
  // workaround annoying "may be used uninitialized in this function" warning with gcc 4.2
-  res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
+  res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)&from);
  #else
  res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
  #endif
@@ -151,7 +151,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
 template<int Offset>
 struct palign_impl<Offset,Packet2cf>
 {
-  static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+  EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
  {
    if (Offset==1)
    {
@@ -350,7 +350,7 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const
 template<int Offset>
 struct palign_impl<Offset,Packet1cd>
 {
-  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
+  EIGEN_STRONG_INLINE static void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
  {
    // FIXME is it sure we never have to align a Packet1cd?
    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -110,18 +110,9 @@ template<> struct unpacket_traits<Packet4f> { typedef float  type; enum {size=4}
 template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
 template<> struct unpacket_traits<Packet4i> { typedef int    type; enum {size=4}; };

-#if defined(_MSC_VER) && (_MSC_VER==1500)
-// Workaround MSVC 9 internal compiler error.
-// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
-// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
-template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set_ps(from,from,from,from); }
-template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
-template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set_epi32(from,from,from,from); }
-#else
 template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set1_ps(from); }
 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set1_epi32(from); }
-#endif

 template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
 template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
@@ -291,7 +282,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)

 template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float*   from)
 {
-  return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
+  return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd((const double*)from)), 0, 0, 1, 1);
 }
 template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double*  from)
 { return pset1<Packet2d>(from[0]); }
@@ -311,8 +302,8 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d&
  _mm_storel_pd((to), from);
  _mm_storeh_pd((to+1), from);
 }
-template<> EIGEN_STRONG_INLINE void pstoreu<float>(float*  to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castps_pd(from)); }
-template<> EIGEN_STRONG_INLINE void pstoreu<int>(int*      to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castsi128_pd(from)); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float*  to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, _mm_castps_pd(from)); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int*      to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, _mm_castsi128_pd(from)); }

 // some compilers might be tempted to perform multiple moves instead of using a vector path.
 template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
@@ -550,7 +541,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
 template<int Offset>
 struct palign_impl<Offset,Packet4f>
 {
-  static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+  EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
  {
    if (Offset!=0)
      first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
@@ -560,7 +551,7 @@ struct palign_impl<Offset,Packet4f>
 template<int Offset>
 struct palign_impl<Offset,Packet4i>
 {
-  static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+  EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
  {
    if (Offset!=0)
      first = _mm_alignr_epi8(second,first, Offset*4);
@@ -570,7 +561,7 @@ struct palign_impl<Offset,Packet4i>
 template<int Offset>
 struct palign_impl<Offset,Packet2d>
 {
-  static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+  EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
  {
    if (Offset==1)
      first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
@@ -581,7 +572,7 @@ struct palign_impl<Offset,Packet2d>
 template<int Offset>
 struct palign_impl<Offset,Packet4f>
 {
-  static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+  EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
  {
    if (Offset==1)
    {
@@ -604,7 +595,7 @@ struct palign_impl<Offset,Packet4f>
 template<int Offset>
 struct palign_impl<Offset,Packet4i>
 {
-  static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+  EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
  {
    if (Offset==1)
    {
@@ -627,7 +618,7 @@ struct palign_impl<Offset,Packet4i>
 template<int Offset>
 struct palign_impl<Offset,Packet2d>
 {
-  static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+  EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
  {
    if (Offset==1)
    {
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -224,8 +224,8 @@ class CoeffBasedProduct
    { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }

  protected:
-    typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
-    typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
+    const LhsNested m_lhs;
+    const RhsNested m_rhs;

    mutable PlainObject m_result;
 };
@@ -252,7 +252,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
+  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
  {
    product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
    res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
@@ -263,7 +263,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
+  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
  {
    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
  }
@@ -273,7 +273,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
+  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
  {
    eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
@@ -291,7 +291,7 @@ struct product_coeff_vectorized_unroller
 {
  typedef typename Lhs::Index Index;
  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
+  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
  {
    product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
    pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
@@ -302,7 +302,7 @@ template<typename Lhs, typename Rhs, typename Packet>
 struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
+  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
  {
    pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
  }
@@ -314,7 +314,7 @@ struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, Re
  typedef typename Lhs::PacketScalar Packet;
  typedef typename Lhs::Index Index;
  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
+  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
  {
    Packet pres;
    product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
@@ -327,7 +327,7 @@ template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int R
 struct product_coeff_vectorized_dyn_selector
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
  {
    res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
  }
@@ -339,7 +339,7 @@ template<typename Lhs, typename Rhs, int RhsCols>
 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
  {
    res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
  }
@@ -349,7 +349,7 @@ template<typename Lhs, typename Rhs, int LhsRows>
 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
  {
    res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
  }
@@ -359,7 +359,7 @@ template<typename Lhs, typename Rhs>
 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
  {
    res = lhs.transpose().cwiseProduct(rhs).sum();
  }
@@ -369,7 +369,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
  {
    product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
  }
@@ -383,7 +383,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
 struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
+  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
  {
    product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
    res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
@@ -394,7 +394,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
 struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
+  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
  {
    product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
    res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
@@ -405,7 +405,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
+  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
  {
    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
  }
@@ -415,7 +415,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
+  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
  {
    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
  }
@@ -425,7 +425,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
+  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
  {
    eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
@@ -438,7 +438,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
+  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
  {
    eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -30,18 +30,19 @@ namespace internal {
 template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
 class gebp_traits;

-
-/** \internal \returns b if a<=0, and returns a otherwise. */
-inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
-{
-  return a<=0 ? b : a;
-}
-
 /** \internal */
 inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
 {
-  static std::ptrdiff_t m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
-  static std::ptrdiff_t m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
+  static std::ptrdiff_t m_l1CacheSize = 0;
+  static std::ptrdiff_t m_l2CacheSize = 0;
+  if(m_l1CacheSize==0)
+  {
+    m_l1CacheSize = queryL1CacheSize();
+    m_l2CacheSize = queryTopLevelCacheSize();
+
+    if(m_l1CacheSize<=0) m_l1CacheSize = 8 * 1024;
+    if(m_l2CacheSize<=0) m_l2CacheSize = 1 * 1024 * 1024;
+  }

  if(action==SetAction)
  {
@@ -80,7 +81,6 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdi
 template<typename LhsScalar, typename RhsScalar, int KcFactor>
 void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
 {
-  EIGEN_UNUSED_VARIABLE(n);
  // Explanations:
  // Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
  // mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
@@ -102,6 +102,7 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
  k = std::min<std::ptrdiff_t>(k, l1/kdiv);
  std::ptrdiff_t _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
  if(_m<m) m = _m & mr_mask;
+  n = n;
 }

 template<typename LhsScalar, typename RhsScalar>
@@ -117,14 +118,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
  // FIXME (a bit overkill maybe ?)

  template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
-    EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
+    EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
    {
      c = cj.pmadd(a,b,c);
    }
  };

  template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
-    EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
+    EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, T& a, T& b, T& c, T& t)
    {
      t = b; t = cj.pmul(a,t); c = padd(c,t);
    }
@@ -535,7 +536,7 @@ struct gebp_kernel
    ResPacketSize = Traits::ResPacketSize
  };

-  EIGEN_DONT_INLINE EIGEN_FLATTEN_ATTRIB
+  EIGEN_FLATTEN_ATTRIB
  void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
  {
@@ -597,64 +598,64 @@ struct gebp_kernel
          if(nr==2)
          {
            LhsPacket A0, A1;
-            RhsPacket B_0;
+            RhsPacket B0;
            RhsPacket T0;
            
 EIGEN_ASM_COMMENT("mybegin2");
            traits.loadLhs(&blA[0*LhsProgress], A0);
            traits.loadLhs(&blA[1*LhsProgress], A1);
-            traits.loadRhs(&blB[0*RhsProgress], B_0);
-            traits.madd(A0,B_0,C0,T0);
-            traits.madd(A1,B_0,C4,B_0);
-            traits.loadRhs(&blB[1*RhsProgress], B_0);
-            traits.madd(A0,B_0,C1,T0);
-            traits.madd(A1,B_0,C5,B_0);
+            traits.loadRhs(&blB[0*RhsProgress], B0);
+            traits.madd(A0,B0,C0,T0);
+            traits.madd(A1,B0,C4,B0);
+            traits.loadRhs(&blB[1*RhsProgress], B0);
+            traits.madd(A0,B0,C1,T0);
+            traits.madd(A1,B0,C5,B0);

            traits.loadLhs(&blA[2*LhsProgress], A0);
            traits.loadLhs(&blA[3*LhsProgress], A1);
-            traits.loadRhs(&blB[2*RhsProgress], B_0);
-            traits.madd(A0,B_0,C0,T0);
-            traits.madd(A1,B_0,C4,B_0);
-            traits.loadRhs(&blB[3*RhsProgress], B_0);
-            traits.madd(A0,B_0,C1,T0);
-            traits.madd(A1,B_0,C5,B_0);
+            traits.loadRhs(&blB[2*RhsProgress], B0);
+            traits.madd(A0,B0,C0,T0);
+            traits.madd(A1,B0,C4,B0);
+            traits.loadRhs(&blB[3*RhsProgress], B0);
+            traits.madd(A0,B0,C1,T0);
+            traits.madd(A1,B0,C5,B0);

            traits.loadLhs(&blA[4*LhsProgress], A0);
            traits.loadLhs(&blA[5*LhsProgress], A1);
-            traits.loadRhs(&blB[4*RhsProgress], B_0);
-            traits.madd(A0,B_0,C0,T0);
-            traits.madd(A1,B_0,C4,B_0);
-            traits.loadRhs(&blB[5*RhsProgress], B_0);
-            traits.madd(A0,B_0,C1,T0);
-            traits.madd(A1,B_0,C5,B_0);
+            traits.loadRhs(&blB[4*RhsProgress], B0);
+            traits.madd(A0,B0,C0,T0);
+            traits.madd(A1,B0,C4,B0);
+            traits.loadRhs(&blB[5*RhsProgress], B0);
+            traits.madd(A0,B0,C1,T0);
+            traits.madd(A1,B0,C5,B0);

            traits.loadLhs(&blA[6*LhsProgress], A0);
            traits.loadLhs(&blA[7*LhsProgress], A1);
-            traits.loadRhs(&blB[6*RhsProgress], B_0);
-            traits.madd(A0,B_0,C0,T0);
-            traits.madd(A1,B_0,C4,B_0);
-            traits.loadRhs(&blB[7*RhsProgress], B_0);
-            traits.madd(A0,B_0,C1,T0);
-            traits.madd(A1,B_0,C5,B_0);
+            traits.loadRhs(&blB[6*RhsProgress], B0);
+            traits.madd(A0,B0,C0,T0);
+            traits.madd(A1,B0,C4,B0);
+            traits.loadRhs(&blB[7*RhsProgress], B0);
+            traits.madd(A0,B0,C1,T0);
+            traits.madd(A1,B0,C5,B0);
 EIGEN_ASM_COMMENT("myend");
          }
          else
          {
 EIGEN_ASM_COMMENT("mybegin4");
            LhsPacket A0, A1;
-            RhsPacket B_0, B1, B2, B3;
+            RhsPacket B0, B1, B2, B3;
            RhsPacket T0;
            
            traits.loadLhs(&blA[0*LhsProgress], A0);
            traits.loadLhs(&blA[1*LhsProgress], A1);
-            traits.loadRhs(&blB[0*RhsProgress], B_0);
+            traits.loadRhs(&blB[0*RhsProgress], B0);
            traits.loadRhs(&blB[1*RhsProgress], B1);

-            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A0,B0,C0,T0);
            traits.loadRhs(&blB[2*RhsProgress], B2);
-            traits.madd(A1,B_0,C4,B_0);
+            traits.madd(A1,B0,C4,B0);
            traits.loadRhs(&blB[3*RhsProgress], B3);
-            traits.loadRhs(&blB[4*RhsProgress], B_0);
+            traits.loadRhs(&blB[4*RhsProgress], B0);
            traits.madd(A0,B1,C1,T0);
            traits.madd(A1,B1,C5,B1);
            traits.loadRhs(&blB[5*RhsProgress], B1);
@@ -666,9 +667,9 @@ EIGEN_ASM_COMMENT("mybegin4");
            traits.madd(A1,B3,C7,B3);
            traits.loadLhs(&blA[3*LhsProgress], A1);
            traits.loadRhs(&blB[7*RhsProgress], B3);
-            traits.madd(A0,B_0,C0,T0);
-            traits.madd(A1,B_0,C4,B_0);
-            traits.loadRhs(&blB[8*RhsProgress], B_0);
+            traits.madd(A0,B0,C0,T0);
+            traits.madd(A1,B0,C4,B0);
+            traits.loadRhs(&blB[8*RhsProgress], B0);
            traits.madd(A0,B1,C1,T0);
            traits.madd(A1,B1,C5,B1);
            traits.loadRhs(&blB[9*RhsProgress], B1);
@@ -681,9 +682,9 @@ EIGEN_ASM_COMMENT("mybegin4");
            traits.loadLhs(&blA[5*LhsProgress], A1);
            traits.loadRhs(&blB[11*RhsProgress], B3);

-            traits.madd(A0,B_0,C0,T0);
-            traits.madd(A1,B_0,C4,B_0);
-            traits.loadRhs(&blB[12*RhsProgress], B_0);
+            traits.madd(A0,B0,C0,T0);
+            traits.madd(A1,B0,C4,B0);
+            traits.loadRhs(&blB[12*RhsProgress], B0);
            traits.madd(A0,B1,C1,T0);
            traits.madd(A1,B1,C5,B1);
            traits.loadRhs(&blB[13*RhsProgress], B1);
@@ -695,8 +696,8 @@ EIGEN_ASM_COMMENT("mybegin4");
            traits.madd(A1,B3,C7,B3);
            traits.loadLhs(&blA[7*LhsProgress], A1);
            traits.loadRhs(&blB[15*RhsProgress], B3);
-            traits.madd(A0,B_0,C0,T0);
-            traits.madd(A1,B_0,C4,B_0);
+            traits.madd(A0,B0,C0,T0);
+            traits.madd(A1,B0,C4,B0);
            traits.madd(A0,B1,C1,T0);
            traits.madd(A1,B1,C5,B1);
            traits.madd(A0,B2,C2,T0);
@@ -714,32 +715,32 @@ EIGEN_ASM_COMMENT("mybegin4");
          if(nr==2)
          {
            LhsPacket A0, A1;
-            RhsPacket B_0;
+            RhsPacket B0;
            RhsPacket T0;

            traits.loadLhs(&blA[0*LhsProgress], A0);
            traits.loadLhs(&blA[1*LhsProgress], A1);
-            traits.loadRhs(&blB[0*RhsProgress], B_0);
-            traits.madd(A0,B_0,C0,T0);
-            traits.madd(A1,B_0,C4,B_0);
-            traits.loadRhs(&blB[1*RhsProgress], B_0);
-            traits.madd(A0,B_0,C1,T0);
-            traits.madd(A1,B_0,C5,B_0);
+            traits.loadRhs(&blB[0*RhsProgress], B0);
+            traits.madd(A0,B0,C0,T0);
+            traits.madd(A1,B0,C4,B0);
+            traits.loadRhs(&blB[1*RhsProgress], B0);
+            traits.madd(A0,B0,C1,T0);
+            traits.madd(A1,B0,C5,B0);
          }
          else
          {
            LhsPacket A0, A1;
-            RhsPacket B_0, B1, B2, B3;
+            RhsPacket B0, B1, B2, B3;
            RhsPacket T0;

            traits.loadLhs(&blA[0*LhsProgress], A0);
            traits.loadLhs(&blA[1*LhsProgress], A1);
-            traits.loadRhs(&blB[0*RhsProgress], B_0);
+            traits.loadRhs(&blB[0*RhsProgress], B0);
            traits.loadRhs(&blB[1*RhsProgress], B1);

-            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A0,B0,C0,T0);
            traits.loadRhs(&blB[2*RhsProgress], B2);
-            traits.madd(A1,B_0,C4,B_0);
+            traits.madd(A1,B0,C4,B0);
            traits.loadRhs(&blB[3*RhsProgress], B3);
            traits.madd(A0,B1,C1,T0);
            traits.madd(A1,B1,C5,B1);
@@ -826,42 +827,42 @@ EIGEN_ASM_COMMENT("mybegin4");
          if(nr==2)
          {
            LhsPacket A0;
-            RhsPacket B_0, B1;
+            RhsPacket B0, B1;

            traits.loadLhs(&blA[0*LhsProgress], A0);
-            traits.loadRhs(&blB[0*RhsProgress], B_0);
+            traits.loadRhs(&blB[0*RhsProgress], B0);
            traits.loadRhs(&blB[1*RhsProgress], B1);
-            traits.madd(A0,B_0,C0,B_0);
-            traits.loadRhs(&blB[2*RhsProgress], B_0);
+            traits.madd(A0,B0,C0,B0);
+            traits.loadRhs(&blB[2*RhsProgress], B0);
            traits.madd(A0,B1,C1,B1);
            traits.loadLhs(&blA[1*LhsProgress], A0);
            traits.loadRhs(&blB[3*RhsProgress], B1);
-            traits.madd(A0,B_0,C0,B_0);
-            traits.loadRhs(&blB[4*RhsProgress], B_0);
+            traits.madd(A0,B0,C0,B0);
+            traits.loadRhs(&blB[4*RhsProgress], B0);
            traits.madd(A0,B1,C1,B1);
            traits.loadLhs(&blA[2*LhsProgress], A0);
            traits.loadRhs(&blB[5*RhsProgress], B1);
-            traits.madd(A0,B_0,C0,B_0);
-            traits.loadRhs(&blB[6*RhsProgress], B_0);
+            traits.madd(A0,B0,C0,B0);
+            traits.loadRhs(&blB[6*RhsProgress], B0);
            traits.madd(A0,B1,C1,B1);
            traits.loadLhs(&blA[3*LhsProgress], A0);
            traits.loadRhs(&blB[7*RhsProgress], B1);
-            traits.madd(A0,B_0,C0,B_0);
+            traits.madd(A0,B0,C0,B0);
            traits.madd(A0,B1,C1,B1);
          }
          else
          {
            LhsPacket A0;
-            RhsPacket B_0, B1, B2, B3;
+            RhsPacket B0, B1, B2, B3;

            traits.loadLhs(&blA[0*LhsProgress], A0);
-            traits.loadRhs(&blB[0*RhsProgress], B_0);
+            traits.loadRhs(&blB[0*RhsProgress], B0);
            traits.loadRhs(&blB[1*RhsProgress], B1);

-            traits.madd(A0,B_0,C0,B_0);
+            traits.madd(A0,B0,C0,B0);
            traits.loadRhs(&blB[2*RhsProgress], B2);
            traits.loadRhs(&blB[3*RhsProgress], B3);
-            traits.loadRhs(&blB[4*RhsProgress], B_0);
+            traits.loadRhs(&blB[4*RhsProgress], B0);
            traits.madd(A0,B1,C1,B1);
            traits.loadRhs(&blB[5*RhsProgress], B1);
            traits.madd(A0,B2,C2,B2);
@@ -869,8 +870,8 @@ EIGEN_ASM_COMMENT("mybegin4");
            traits.madd(A0,B3,C3,B3);
            traits.loadLhs(&blA[1*LhsProgress], A0);
            traits.loadRhs(&blB[7*RhsProgress], B3);
-            traits.madd(A0,B_0,C0,B_0);
-            traits.loadRhs(&blB[8*RhsProgress], B_0);
+            traits.madd(A0,B0,C0,B0);
+            traits.loadRhs(&blB[8*RhsProgress], B0);
            traits.madd(A0,B1,C1,B1);
            traits.loadRhs(&blB[9*RhsProgress], B1);
            traits.madd(A0,B2,C2,B2);
@@ -879,8 +880,8 @@ EIGEN_ASM_COMMENT("mybegin4");
            traits.loadLhs(&blA[2*LhsProgress], A0);
            traits.loadRhs(&blB[11*RhsProgress], B3);

-            traits.madd(A0,B_0,C0,B_0);
-            traits.loadRhs(&blB[12*RhsProgress], B_0);
+            traits.madd(A0,B0,C0,B0);
+            traits.loadRhs(&blB[12*RhsProgress], B0);
            traits.madd(A0,B1,C1,B1);
            traits.loadRhs(&blB[13*RhsProgress], B1);
            traits.madd(A0,B2,C2,B2);
@@ -889,7 +890,7 @@ EIGEN_ASM_COMMENT("mybegin4");

            traits.loadLhs(&blA[3*LhsProgress], A0);
            traits.loadRhs(&blB[15*RhsProgress], B3);
-            traits.madd(A0,B_0,C0,B_0);
+            traits.madd(A0,B0,C0,B0);
            traits.madd(A0,B1,C1,B1);
            traits.madd(A0,B2,C2,B2);
            traits.madd(A0,B3,C3,B3);
@@ -904,26 +905,26 @@ EIGEN_ASM_COMMENT("mybegin4");
          if(nr==2)
          {
            LhsPacket A0;
-            RhsPacket B_0, B1;
+            RhsPacket B0, B1;

            traits.loadLhs(&blA[0*LhsProgress], A0);
-            traits.loadRhs(&blB[0*RhsProgress], B_0);
+            traits.loadRhs(&blB[0*RhsProgress], B0);
            traits.loadRhs(&blB[1*RhsProgress], B1);
-            traits.madd(A0,B_0,C0,B_0);
+            traits.madd(A0,B0,C0,B0);
            traits.madd(A0,B1,C1,B1);
          }
          else
          {
            LhsPacket A0;
-            RhsPacket B_0, B1, B2, B3;
+            RhsPacket B0, B1, B2, B3;

            traits.loadLhs(&blA[0*LhsProgress], A0);
-            traits.loadRhs(&blB[0*RhsProgress], B_0);
+            traits.loadRhs(&blB[0*RhsProgress], B0);
            traits.loadRhs(&blB[1*RhsProgress], B1);
            traits.loadRhs(&blB[2*RhsProgress], B2);
            traits.loadRhs(&blB[3*RhsProgress], B3);

-            traits.madd(A0,B_0,C0,B_0);
+            traits.madd(A0,B0,C0,B0);
            traits.madd(A0,B1,C1,B1);
            traits.madd(A0,B2,C2,B2);
            traits.madd(A0,B3,C3,B3);
@@ -970,26 +971,26 @@ EIGEN_ASM_COMMENT("mybegin4");
          if(nr==2)
          {
            LhsScalar A0;
-            RhsScalar B_0, B1;
+            RhsScalar B0, B1;

            A0 = blA[k];
-            B_0 = blB[0];
+            B0 = blB[0];
            B1 = blB[1];
-            MADD(cj,A0,B_0,C0,B_0);
+            MADD(cj,A0,B0,C0,B0);
            MADD(cj,A0,B1,C1,B1);
          }
          else
          {
            LhsScalar A0;
-            RhsScalar B_0, B1, B2, B3;
+            RhsScalar B0, B1, B2, B3;

            A0 = blA[k];
-            B_0 = blB[0];
+            B0 = blB[0];
            B1 = blB[1];
            B2 = blB[2];
            B3 = blB[3];

-            MADD(cj,A0,B_0,C0,B_0);
+            MADD(cj,A0,B0,C0,B0);
            MADD(cj,A0,B1,C1,B1);
            MADD(cj,A0,B2,C2,B2);
            MADD(cj,A0,B3,C3,B3);
@@ -1026,14 +1027,14 @@ EIGEN_ASM_COMMENT("mybegin4");
        for(Index k=0; k<depth; k++)
        {
          LhsPacket A0, A1;
-          RhsPacket B_0;
+          RhsPacket B0;
          RhsPacket T0;

          traits.loadLhs(&blA[0*LhsProgress], A0);
          traits.loadLhs(&blA[1*LhsProgress], A1);
-          traits.loadRhs(&blB[0*RhsProgress], B_0);
-          traits.madd(A0,B_0,C0,T0);
-          traits.madd(A1,B_0,C4,B_0);
+          traits.loadRhs(&blB[0*RhsProgress], B0);
+          traits.madd(A0,B0,C0,T0);
+          traits.madd(A1,B0,C4,B0);

          blB += RhsProgress;
          blA += 2*LhsProgress;
@@ -1065,10 +1066,10 @@ EIGEN_ASM_COMMENT("mybegin4");
        for(Index k=0; k<depth; k++)
        {
          LhsPacket A0;
-          RhsPacket B_0;
+          RhsPacket B0;
          traits.loadLhs(blA, A0);
-          traits.loadRhs(blB, B_0);
-          traits.madd(A0, B_0, C0, B_0);
+          traits.loadRhs(blB, B0);
+          traits.madd(A0, B0, C0, B0);
          blB += RhsProgress;
          blA += LhsProgress;
        }
@@ -1090,8 +1091,8 @@ EIGEN_ASM_COMMENT("mybegin4");
        for(Index k=0; k<depth; k++)
        {
          LhsScalar A0 = blA[k];
-          RhsScalar B_0 = blB[k];
-          MADD(cj, A0, B_0, C0, B_0);
+          RhsScalar B0 = blB[k];
+          MADD(cj, A0, B0, C0, B0);
        }
        res[(j2+0)*resStride + i] += alpha*C0;
      }
@@ -1102,7 +1103,7 @@ EIGEN_ASM_COMMENT("mybegin4");
 #undef CJMADD

 // pack a block of the lhs
-// The traversal is as follow (mr==4):
+// The travesal is as follow (mr==4):
 //   0  4  8 12 ...
 //   1  5  9 13 ...
 //   2  6 10 14 ...
@@ -1118,15 +1119,11 @@ EIGEN_ASM_COMMENT("mybegin4");
 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
 struct gemm_pack_lhs
 {
-  EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
+  void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
                  Index stride=0, Index offset=0)
  {
-    typedef typename packet_traits<Scalar>::type Packet;
-    enum { PacketSize = packet_traits<Scalar>::size };
-
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+//     enum { PacketSize = packet_traits<Scalar>::size };
    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
-    eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
    const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
    Index count = 0;
@@ -1134,44 +1131,9 @@ struct gemm_pack_lhs
    for(Index i=0; i<peeled_mc; i+=Pack1)
    {
      if(PanelMode) count += Pack1 * offset;
-
-      if(StorageOrder==ColMajor)
-      {
-        for(Index k=0; k<depth; k++)
-        {
-          Packet A, B, C, D;
-          if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
-          if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
-          if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
-          if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
-          if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
-          if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
-          if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
-          if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
-        }
-      }
-      else
-      {
-        for(Index k=0; k<depth; k++)
-        {
-          // TODO add a vectorized transpose here
-          Index w=0;
-          for(; w<Pack1-3; w+=4)
-          {
-            Scalar a(cj(lhs(i+w+0, k))),
-                   b(cj(lhs(i+w+1, k))),
-                   c(cj(lhs(i+w+2, k))),
-                   d(cj(lhs(i+w+3, k)));
-            blockA[count++] = a;
-            blockA[count++] = b;
-            blockA[count++] = c;
-            blockA[count++] = d;
-          }
-          if(Pack1%4)
-            for(;w<Pack1;++w)
-              blockA[count++] = cj(lhs(i+w, k));
-        }
-      }
+      for(Index k=0; k<depth; k++)
+        for(Index w=0; w<Pack1; w++)
+          blockA[count++] = cj(lhs(i+w, k));
      if(PanelMode) count += Pack1 * (stride-offset-depth);
    }
    if(rows-peeled_mc>=Pack2)
@@ -1205,10 +1167,9 @@ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
 {
  typedef typename packet_traits<Scalar>::type Packet;
  enum { PacketSize = packet_traits<Scalar>::size };
-  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
+  void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
                  Index stride=0, Index offset=0)
  {
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
    Index packet_cols = (cols/nr) * nr;
@@ -1253,10 +1214,9 @@ template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode
 struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
 {
  enum { PacketSize = packet_traits<Scalar>::size };
-  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
+  void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
                  Index stride=0, Index offset=0)
  {
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
    Index packet_cols = (cols/nr) * nr;
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -78,7 +78,7 @@ static void run(Index rows, Index cols, Index depth,
  typedef gebp_traits<LhsScalar,RhsScalar> Traits;

  Index kc = blocking.kc();                 // cache block size along the K direction
-  Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
+  Index mc = std::min(rows,blocking.mc());  // cache block size along the M direction
  //Index nc = blocking.nc(); // cache block size along the N direction

  gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@@ -94,16 +94,15 @@ static void run(Index rows, Index cols, Index depth,
    
    std::size_t sizeA = kc*mc;
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
-    ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, 0);
-    ei_declare_aligned_stack_constructed_variable(RhsScalar, w, sizeW, 0);
-    
+    LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, sizeA);
+    RhsScalar* w = ei_aligned_stack_new(RhsScalar, sizeW);
    RhsScalar* blockB = blocking.blockB();
    eigen_internal_assert(blockB!=0);

    // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
    for(Index k=0; k<depth; k+=kc)
    {
-      const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A'
+      const Index actual_kc = std::min(k+kc,depth)-k; // => rows of B', and cols of the A'

      // In order to reduce the chance that a thread has to wait for the other,
      // let's start by packing A'.
@@ -140,7 +139,7 @@ static void run(Index rows, Index cols, Index depth,
      // Then keep going as usual with the remaining A'
      for(Index i=mc; i<rows; i+=mc)
      {
-        const Index actual_mc = (std::min)(i+mc,rows)-i;
+        const Index actual_mc = std::min(i+mc,rows)-i;

        // pack A_i,k to A'
        pack_lhs(blockA, &lhs(i,k), lhsStride, actual_kc, actual_mc);
@@ -155,6 +154,9 @@ static void run(Index rows, Index cols, Index depth,
        #pragma omp atomic
        --(info[j].users);
    }
+
+    ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
+    ei_aligned_stack_delete(RhsScalar, w, sizeW);
  }
  else
 #endif // EIGEN_HAS_OPENMP
@@ -165,16 +167,15 @@ static void run(Index rows, Index cols, Index depth,
    std::size_t sizeA = kc*mc;
    std::size_t sizeB = kc*cols;
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
-
-    ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
-    ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
-    ei_declare_aligned_stack_constructed_variable(RhsScalar, blockW, sizeW, blocking.blockW());
+    LhsScalar *blockA = blocking.blockA()==0 ? ei_aligned_stack_new(LhsScalar, sizeA) : blocking.blockA();
+    RhsScalar *blockB = blocking.blockB()==0 ? ei_aligned_stack_new(RhsScalar, sizeB) : blocking.blockB();
+    RhsScalar *blockW = blocking.blockW()==0 ? ei_aligned_stack_new(RhsScalar, sizeW) : blocking.blockW();

    // For each horizontal panel of the rhs, and corresponding panel of the lhs...
    // (==GEMM_VAR1)
    for(Index k2=0; k2<depth; k2+=kc)
    {
-      const Index actual_kc = (std::min)(k2+kc,depth)-k2;
+      const Index actual_kc = std::min(k2+kc,depth)-k2;

      // OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
      // => Pack rhs's panel into a sequential chunk of memory (L2 caching)
@@ -187,7 +188,7 @@ static void run(Index rows, Index cols, Index depth,
      // (==GEPP_VAR1)
      for(Index i2=0; i2<rows; i2+=mc)
      {
-        const Index actual_mc = (std::min)(i2+mc,rows)-i2;
+        const Index actual_mc = std::min(i2+mc,rows)-i2;

        // We pack the lhs's block into a sequential chunk of memory (L1 caching)
        // Note that this block will be read a very high number of times, which is equal to the number of
@@ -199,6 +200,10 @@ static void run(Index rows, Index cols, Index depth,

      }
    }
+
+    if(blocking.blockA()==0) ei_aligned_stack_delete(LhsScalar, blockA, sizeA);
+    if(blocking.blockB()==0) ei_aligned_stack_delete(RhsScalar, blockB, sizeB);
+    if(blocking.blockW()==0) ei_aligned_stack_delete(RhsScalar, blockW, sizeW);
  }
 }

@@ -412,8 +417,8 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
    {
      eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());

-      typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
-      typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+      const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
+      const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);

      Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
                                 * RhsBlasTraits::extractScalarFactor(m_rhs);
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -42,14 +42,14 @@ struct tribb_kernel;
 template <typename Index,
          typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
-                              int ResStorageOrder, int  UpLo, int Version = Specialized>
+                              int ResStorageOrder, int  UpLo>
 struct general_matrix_matrix_triangular_product;

 // as usual if the result is row major => we transpose the product
 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
-                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo, int Version>
-struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version>
-{
+                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo>
+struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo>
+{  
  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
                                      const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
@@ -63,8 +63,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
 };

 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
-                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo, int Version>
-struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version>
+                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo>
+struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo>
 {
  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
@@ -83,10 +83,10 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
    if(mc > Traits::nr)
      mc = (mc/Traits::nr)*Traits::nr;

+    LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, kc*mc);
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
    std::size_t sizeB = sizeW + kc*size;
-    ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0);
-    ei_declare_aligned_stack_constructed_variable(RhsScalar, allocatedBlockB, sizeB, 0);
+    RhsScalar* allocatedBlockB = ei_aligned_stack_new(RhsScalar, sizeB);
    RhsScalar* blockB = allocatedBlockB + sizeW;
    
    gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@@ -96,14 +96,14 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,

    for(Index k2=0; k2<depth; k2+=kc)
    {
-      const Index actual_kc = (std::min)(k2+kc,depth)-k2;
+      const Index actual_kc = std::min(k2+kc,depth)-k2;

      // note that the actual rhs is the transpose/adjoint of mat
      pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, size);

      for(Index i2=0; i2<size; i2+=mc)
      {
-        const Index actual_mc = (std::min)(i2+mc,size)-i2;
+        const Index actual_mc = std::min(i2+mc,size)-i2;

        pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);

@@ -112,7 +112,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
        //  2 - the actual_mc x actual_mc symmetric block => processed with a special kernel
        //  3 - after the diagonal => processed with gebp or skipped
        if (UpLo==Lower)
-          gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha,
+          gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2), alpha,
               -1, -1, 0, 0, allocatedBlockB);

        sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB);
@@ -120,11 +120,13 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
        if (UpLo==Upper)
        {
          Index j2 = i2+actual_mc;
-          gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, (std::max)(Index(0), size-j2), alpha,
+          gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0), size-j2), alpha,
               -1, -1, 0, 0, allocatedBlockB);
        }
      }
    }
+    ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
+    ei_aligned_stack_delete(RhsScalar, allocatedBlockB, sizeB);
  }
 };

@@ -201,13 +203,13 @@ TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(
  typedef internal::blas_traits<Lhs> LhsBlasTraits;
  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
  typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
-  typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+  const ActualLhs actualLhs = LhsBlasTraits::extract(prod.lhs());
  
  typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs;
  typedef internal::blas_traits<Rhs> RhsBlasTraits;
  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
  typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
-  typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+  const ActualRhs actualRhs = RhsBlasTraits::extract(prod.rhs());

  typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());

--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h
@@ -1,142 +0,0 @@
-/*
- Copyright (c) 2011, Intel Corporation. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without modification,
- are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its contributors may
-   be used to endorse or promote products derived from this software without
-   specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- ********************************************************************************
- *   Content : Eigen bindings to Intel(R) MKL
- *   Level 3 BLAS SYRK/HERK implementation.
- ********************************************************************************
-*/
-
-#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
-#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
-
-namespace internal {
-
-template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int  UpLo>
-struct general_matrix_matrix_rankupdate :
-       general_matrix_matrix_triangular_product<
-         Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {};
-
-
-// try to go to BLAS specialization
-#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \
-template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
-                          int RhsStorageOrder, bool ConjugateRhs, int  UpLo> \
-struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
-               Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
-  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
-                          const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \
-  { \
-    if (lhs==rhs) { \
-      general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
-      ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
-    } else { \
-      general_matrix_matrix_triangular_product<Index, \
-        Scalar, LhsStorageOrder, ConjugateLhs, \
-        Scalar, RhsStorageOrder, ConjugateRhs, \
-        ColMajor, UpLo, BuiltIn> \
-      ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
-    } \
-  } \
-};
-
-EIGEN_MKL_RANKUPDATE_SPECIALIZE(double)
-//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex)
-EIGEN_MKL_RANKUPDATE_SPECIALIZE(float)
-//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex)
-
-// SYRK for float/double
-#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \
-template <typename Index, int AStorageOrder, bool ConjugateA, int  UpLo> \
-struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
-  enum { \
-    IsLower = (UpLo&Lower) == Lower, \
-    LowUp = IsLower ? Lower : Upper, \
-    conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \
-  }; \
-  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
-                          const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
-  { \
-  /* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
-\
-   MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
-   char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
-   MKLTYPE alpha_, beta_; \
-\
-/* Set alpha_ & beta_ */ \
-   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
-   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
-   MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \
-  } \
-};
-
-// HERK for complex data
-#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \
-template <typename Index, int AStorageOrder, bool ConjugateA, int  UpLo> \
-struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
-  enum { \
-    IsLower = (UpLo&Lower) == Lower, \
-    LowUp = IsLower ? Lower : Upper, \
-    conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \
-  }; \
-  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
-                          const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
-  { \
-   typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
-\
-   MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
-   char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
-   RTYPE alpha_, beta_; \
-   const EIGTYPE* a_ptr; \
-\
-/* Set alpha_ & beta_ */ \
-/*   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\
-/*   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \
-   alpha_ = alpha.real(); \
-   beta_ = 1.0; \
-/* Copy with conjugation in some cases*/ \
-   MatrixType a; \
-   if (conjA) { \
-     Map<const MatrixType, 0, OuterStride<> > mapA(lhs,n,k,OuterStride<>(lhsStride)); \
-     a = mapA.conjugate(); \
-     lda = a.outerStride(); \
-     a_ptr = a.data(); \
-   } else a_ptr=lhs; \
-   MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \
-  } \
-};
-
-
-EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk)
-EIGEN_MKL_RANKUPDATE_R(float,  float,  ssyrk)
-
-//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk)
-//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8,  double, cherk)
-
-
-} // end namespace internal
-
-#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
--- a/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
@@ -1,114 +0,0 @@
-/*
- Copyright (c) 2011, Intel Corporation. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without modification,
- are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its contributors may
-   be used to endorse or promote products derived from this software without
-   specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- ********************************************************************************
- *   Content : Eigen bindings to Intel(R) MKL
- *   General matrix-matrix product functionality based on ?GEMM.
- ********************************************************************************
-*/
-
-#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
-#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
-
-namespace internal {
-
-/**********************************************************************
-* This file implements general matrix-matrix multiplication using BLAS
-* gemm function via partial specialization of
-* general_matrix_matrix_product::run(..) method for float, double,
-* std::complex<float> and std::complex<double> types
-**********************************************************************/
-
-// gemm specialization
-
-#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \
-template< \
-  typename Index, \
-  int LhsStorageOrder, bool ConjugateLhs, \
-  int RhsStorageOrder, bool ConjugateRhs> \
-struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
-{ \
-static void run(Index rows, Index cols, Index depth, \
-  const EIGTYPE* _lhs, Index lhsStride, \
-  const EIGTYPE* _rhs, Index rhsStride, \
-  EIGTYPE* res, Index resStride, \
-  EIGTYPE alpha, \
-  level3_blocking<EIGTYPE, EIGTYPE>& blocking, \
-  GemmParallelInfo<Index>* info = 0) \
-{ \
-  using std::conj; \
-\
-  char transa, transb; \
-  MKL_INT m, n, k, lda, ldb, ldc; \
-  const EIGTYPE *a, *b; \
-  MKLTYPE alpha_, beta_; \
-  MatrixX##EIGPREFIX a_tmp, b_tmp; \
-  EIGTYPE myone(1);\
-\
-/* Set transpose options */ \
-  transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
-  transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
-\
-/* Set m, n, k */ \
-  m = (MKL_INT)rows;  \
-  n = (MKL_INT)cols;  \
-  k = (MKL_INT)depth; \
-\
-/* Set alpha_ & beta_ */ \
-  assign_scalar_eig2mkl(alpha_, alpha); \
-  assign_scalar_eig2mkl(beta_, myone); \
-\
-/* Set lda, ldb, ldc */ \
-  lda = (MKL_INT)lhsStride; \
-  ldb = (MKL_INT)rhsStride; \
-  ldc = (MKL_INT)resStride; \
-\
-/* Set a, b, c */ \
-  if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \
-    Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \
-    a_tmp = lhs.conjugate(); \
-    a = a_tmp.data(); \
-    lda = a_tmp.outerStride(); \
-  } else a = _lhs; \
-\
-  if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \
-    Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \
-    b_tmp = rhs.conjugate(); \
-    b = b_tmp.data(); \
-    ldb = b_tmp.outerStride(); \
-  } else b = _rhs; \
-\
-  MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
-}};
-
-GEMM_SPECIALIZATION(double,   d,  double,        d)
-GEMM_SPECIALIZATION(float,    f,  float,         s)
-GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z)
-GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8,  c)
-
-} //end of namespase
-
-#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -40,8 +40,8 @@ namespace internal {
 *  |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
 *  |cplx |real |real | optimal case, vectorization possible via real-cplx mul
 */
-template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
-struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
+struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
 {
 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;

@@ -99,7 +99,7 @@ EIGEN_DONT_INLINE static void run(
  
  // How many coeffs of the result do we have to skip to be aligned.
  // Here we assume data are at least aligned on the base scalar type.
-  Index alignedStart = internal::first_aligned(res,size);
+  Index alignedStart = first_aligned(res,size);
  Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
  const Index peeledSize  = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;

@@ -109,7 +109,7 @@ EIGEN_DONT_INLINE static void run(
                       : FirstAligned;

  // we cannot assume the first element is aligned because of sub-matrices
-  const Index lhsAlignmentOffset = internal::first_aligned(lhs,size);
+  const Index lhsAlignmentOffset = first_aligned(lhs,size);

  // find how many columns do we have to skip to be aligned with the result (if possible)
  Index skipColumns = 0;
@@ -134,7 +134,7 @@ EIGEN_DONT_INLINE static void run(
    }
    else
    {
-      skipColumns = (std::min)(skipColumns,cols);
+      skipColumns = std::min(skipColumns,cols);
      // note that the skiped columns are processed later.
    }

@@ -296,8 +296,8 @@ EIGEN_DONT_INLINE static void run(
 *  - alpha is always a complex (or converted to a complex)
 *  - no vectorization
 */
-template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
-struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
+struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
 {
 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;

@@ -351,7 +351,7 @@ EIGEN_DONT_INLINE static void run(
  // How many coeffs of the result do we have to skip to be aligned.
  // Here we assume data are at least aligned on the base scalar type
  // if that's not the case then vectorization is discarded, see below.
-  Index alignedStart = internal::first_aligned(rhs, depth);
+  Index alignedStart = first_aligned(rhs, depth);
  Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
  const Index peeledSize  = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;

@@ -361,7 +361,7 @@ EIGEN_DONT_INLINE static void run(
                         : FirstAligned;

  // we cannot assume the first element is aligned because of sub-matrices
-  const Index lhsAlignmentOffset = internal::first_aligned(lhs,depth);
+  const Index lhsAlignmentOffset = first_aligned(lhs,depth);

  // find how many rows do we have to skip to be aligned with rhs (if possible)
  Index skipRows = 0;
@@ -386,7 +386,7 @@ EIGEN_DONT_INLINE static void run(
    }
    else
    {
-      skipRows = (std::min)(skipRows,Index(rows));
+      skipRows = std::min(skipRows,Index(rows));
      // note that the skiped columns are processed later.
    }
    eigen_internal_assert(  alignmentPattern==NoneAligned
--- a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
@@ -1,127 +0,0 @@
-/*
- Copyright (c) 2011, Intel Corporation. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without modification,
- are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its contributors may
-   be used to endorse or promote products derived from this software without
-   specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- ********************************************************************************
- *   Content : Eigen bindings to Intel(R) MKL
- *   General matrix-vector product functionality based on ?GEMV.
- ********************************************************************************
-*/
-
-#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
-#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
-
-namespace internal {
-
-/**********************************************************************
-* This file implements general matrix-vector multiplication using BLAS
-* gemv function via partial specialization of
-* general_matrix_vector_product::run(..) method for float, double,
-* std::complex<float> and std::complex<double> types
-**********************************************************************/
-
-// gemv specialization
-
-template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
-struct general_matrix_vector_product_gemv :
-  general_matrix_vector_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,ConjugateRhs,BuiltIn> {};
-
-#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
-template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
-struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
-  Index rows, Index cols, \
-  const Scalar* lhs, Index lhsStride, \
-  const Scalar* rhs, Index rhsIncr, \
-  Scalar* res, Index resIncr, Scalar alpha) \
-{ \
-  if (ConjugateLhs) { \
-    general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,BuiltIn>::run( \
-      rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
-  } else { \
-    general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
-      rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
-  } \
-} \
-}; \
-template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
-struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
-  Index rows, Index cols, \
-  const Scalar* lhs, Index lhsStride, \
-  const Scalar* rhs, Index rhsIncr, \
-  Scalar* res, Index resIncr, Scalar alpha) \
-{ \
-    general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
-      rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
-} \
-}; \
-
-EIGEN_MKL_GEMV_SPECIALIZE(double)
-EIGEN_MKL_GEMV_SPECIALIZE(float)
-EIGEN_MKL_GEMV_SPECIALIZE(dcomplex)
-EIGEN_MKL_GEMV_SPECIALIZE(scomplex)
-
-#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \
-template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
-struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
-{ \
-typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
-\
-static EIGEN_DONT_INLINE void run( \
-  Index rows, Index cols, \
-  const EIGTYPE* lhs, Index lhsStride, \
-  const EIGTYPE* rhs, Index rhsIncr, \
-  EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
-{ \
-  MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \
-  MKLTYPE alpha_, beta_; \
-  const EIGTYPE *x_ptr, myone(1); \
-  char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
-  if (LhsStorageOrder==RowMajor) { \
-    m=cols; \
-    n=rows; \
-  }\
-  assign_scalar_eig2mkl(alpha_, alpha); \
-  assign_scalar_eig2mkl(beta_, myone); \
-  GEMVVector x_tmp; \
-  if (ConjugateRhs) { \
-    Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
-    x_tmp=map_x.conjugate(); \
-    x_ptr=x_tmp.data(); \
-    incx=1; \
-  } else x_ptr=rhs; \
-  MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
-}\
-};
-
-EIGEN_MKL_GEMV_SPECIALIZATION(double,   double,        d)
-EIGEN_MKL_GEMV_SPECIALIZATION(float,    float,         s)
-EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z)
-EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8,  c)
-
-} //end of namespase
-
-#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -85,9 +85,7 @@ template<typename Index> struct GemmParallelInfo
 template<bool Condition, typename Functor, typename Index>
 void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
 {
-  // TODO when EIGEN_USE_BLAS is defined,
-  // we should still enable OMP for other scalar types
-#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
+#ifndef EIGEN_HAS_OPENMP
  // FIXME the transpose variable is only needed to properly split
  // the matrix product when multithreading is enabled. This is a temporary
  // fix to support row-major destination matrices. This whole
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -114,7 +114,7 @@ struct symm_pack_rhs
    }

    // second part: diagonal block
-    for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
+    for(Index j2=k2; j2<std::min(k2+rows,packet_cols); j2+=nr)
    {
      // again we can split vertically in three different parts (transpose, symmetric, normal)
      // transpose
@@ -179,7 +179,7 @@ struct symm_pack_rhs
    for(Index j2=packet_cols; j2<cols; ++j2)
    {
      // transpose
-      Index half = (std::min)(end_k,j2);
+      Index half = std::min(end_k,j2);
      for(Index k=k2; k<half; k++)
      {
        blockB[count] = conj(rhs(j2,k));
@@ -261,12 +261,12 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
    Index nc = cols;  // cache block size along the N direction
    computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
    // kc must smaller than mc
-    kc = (std::min)(kc,mc);
+    kc = std::min(kc,mc);

+    Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
    std::size_t sizeB = sizeW + kc*cols;
-    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
-    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
+    Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
    Scalar* blockB = allocatedBlockB + sizeW;

    gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
@@ -276,7 +276,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs

    for(Index k2=0; k2<size; k2+=kc)
    {
-      const Index actual_kc = (std::min)(k2+kc,size)-k2;
+      const Index actual_kc = std::min(k2+kc,size)-k2;

      // we have selected one row panel of rhs and one column panel of lhs
      // pack rhs's panel into a sequential chunk of memory
@@ -289,7 +289,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
      //  3 - the panel below the diagonal block => generic packed copy
      for(Index i2=0; i2<k2; i2+=mc)
      {
-        const Index actual_mc = (std::min)(i2+mc,k2)-i2;
+        const Index actual_mc = std::min(i2+mc,k2)-i2;
        // transposed packed copy
        pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);

@@ -297,7 +297,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
      }
      // the block diagonal
      {
-        const Index actual_mc = (std::min)(k2+kc,size)-k2;
+        const Index actual_mc = std::min(k2+kc,size)-k2;
        // symmetric packed copy
        pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);

@@ -306,13 +306,16 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs

      for(Index i2=k2+kc; i2<size; i2+=mc)
      {
-        const Index actual_mc = (std::min)(i2+mc,size)-i2;
+        const Index actual_mc = std::min(i2+mc,size)-i2;
        gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
          (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);

        gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
      }
    }
+
+    ei_aligned_stack_delete(Scalar, blockA, kc*mc);
+    ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
  }
 };

@@ -340,10 +343,11 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
    Index mc = rows;  // cache block size along the M direction
    Index nc = cols;  // cache block size along the N direction
    computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
+
+    Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
    std::size_t sizeB = sizeW + kc*cols;
-    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
-    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
+    Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
    Scalar* blockB = allocatedBlockB + sizeW;

    gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
@@ -352,19 +356,22 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh

    for(Index k2=0; k2<size; k2+=kc)
    {
-      const Index actual_kc = (std::min)(k2+kc,size)-k2;
+      const Index actual_kc = std::min(k2+kc,size)-k2;

      pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);

      // => GEPP
      for(Index i2=0; i2<rows; i2+=mc)
      {
-        const Index actual_mc = (std::min)(i2+mc,rows)-i2;
+        const Index actual_mc = std::min(i2+mc,rows)-i2;
        pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);

        gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
      }
    }
+
+    ei_aligned_stack_delete(Scalar, blockA, kc*mc);
+    ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
  }
 };

@@ -400,8 +407,8 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
  {
    eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());

-    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
-    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+    const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
+    const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);

    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
                               * RhsBlasTraits::extractScalarFactor(m_rhs);
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
@@ -1,291 +0,0 @@
-/*
- Copyright (c) 2011, Intel Corporation. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without modification,
- are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its contributors may
-   be used to endorse or promote products derived from this software without
-   specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- ********************************************************************************
- *   Content : Eigen bindings to Intel(R) MKL
- *   Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
- ********************************************************************************
-*/
-
-#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
-#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
-
-namespace internal {
-
-
-/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
-
-#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
-template <typename Index, \
-          int LhsStorageOrder, bool ConjugateLhs, \
-          int RhsStorageOrder, bool ConjugateRhs> \
-struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
-{\
-\
-  static EIGEN_DONT_INLINE void run( \
-    Index rows, Index cols, \
-    const EIGTYPE* _lhs, Index lhsStride, \
-    const EIGTYPE* _rhs, Index rhsStride, \
-    EIGTYPE* res,        Index resStride, \
-    EIGTYPE alpha) \
-  { \
-    char side='L', uplo='L'; \
-    MKL_INT m, n, lda, ldb, ldc; \
-    const EIGTYPE *a, *b; \
-    MKLTYPE alpha_, beta_; \
-    MatrixX##EIGPREFIX b_tmp; \
-    EIGTYPE myone(1);\
-\
-/* Set transpose options */ \
-/* Set m, n, k */ \
-    m = (MKL_INT)rows;  \
-    n = (MKL_INT)cols;  \
-\
-/* Set alpha_ & beta_ */ \
-    assign_scalar_eig2mkl(alpha_, alpha); \
-    assign_scalar_eig2mkl(beta_, myone); \
-\
-/* Set lda, ldb, ldc */ \
-    lda = (MKL_INT)lhsStride; \
-    ldb = (MKL_INT)rhsStride; \
-    ldc = (MKL_INT)resStride; \
-\
-/* Set a, b, c */ \
-    if (LhsStorageOrder==RowMajor) uplo='U'; \
-    a = _lhs; \
-\
-    if (RhsStorageOrder==RowMajor) { \
-      Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
-      b_tmp = rhs.adjoint(); \
-      b = b_tmp.data(); \
-      ldb = b_tmp.outerStride(); \
-    } else b = _rhs; \
-\
-    MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
-\
-  } \
-};
-
-
-#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
-template <typename Index, \
-          int LhsStorageOrder, bool ConjugateLhs, \
-          int RhsStorageOrder, bool ConjugateRhs> \
-struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
-{\
-  static EIGEN_DONT_INLINE void run( \
-    Index rows, Index cols, \
-    const EIGTYPE* _lhs, Index lhsStride, \
-    const EIGTYPE* _rhs, Index rhsStride, \
-    EIGTYPE* res,        Index resStride, \
-    EIGTYPE alpha) \
-  { \
-    char side='L', uplo='L'; \
-    MKL_INT m, n, lda, ldb, ldc; \
-    const EIGTYPE *a, *b; \
-    MKLTYPE alpha_, beta_; \
-    MatrixX##EIGPREFIX b_tmp; \
-    Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
-    EIGTYPE myone(1); \
-\
-/* Set transpose options */ \
-/* Set m, n, k */ \
-    m = (MKL_INT)rows; \
-    n = (MKL_INT)cols; \
-\
-/* Set alpha_ & beta_ */ \
-    assign_scalar_eig2mkl(alpha_, alpha); \
-    assign_scalar_eig2mkl(beta_, myone); \
-\
-/* Set lda, ldb, ldc */ \
-    lda = (MKL_INT)lhsStride; \
-    ldb = (MKL_INT)rhsStride; \
-    ldc = (MKL_INT)resStride; \
-\
-/* Set a, b, c */ \
-    if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
-      Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \
-      a_tmp = lhs.conjugate(); \
-      a = a_tmp.data(); \
-      lda = a_tmp.outerStride(); \
-    } else a = _lhs; \
-    if (LhsStorageOrder==RowMajor) uplo='U'; \
-\
-    if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \
-       b = _rhs; } \
-    else { \
-      if (RhsStorageOrder==ColMajor && ConjugateRhs) { \
-        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \
-        b_tmp = rhs.conjugate(); \
-      } else \
-      if (ConjugateRhs) { \
-        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
-        b_tmp = rhs.adjoint(); \
-      } else { \
-        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
-        b_tmp = rhs.transpose(); \
-      } \
-      b = b_tmp.data(); \
-      ldb = b_tmp.outerStride(); \
-    } \
-\
-    MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
-\
-  } \
-};
-
-EIGEN_MKL_SYMM_L(double, double, d, d)
-EIGEN_MKL_SYMM_L(float, float, f, s)
-EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c)
-
-
-/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
-
-#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
-template <typename Index, \
-          int LhsStorageOrder, bool ConjugateLhs, \
-          int RhsStorageOrder, bool ConjugateRhs> \
-struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
-{\
-\
-  static EIGEN_DONT_INLINE void run( \
-    Index rows, Index cols, \
-    const EIGTYPE* _lhs, Index lhsStride, \
-    const EIGTYPE* _rhs, Index rhsStride, \
-    EIGTYPE* res,        Index resStride, \
-    EIGTYPE alpha) \
-  { \
-    char side='R', uplo='L'; \
-    MKL_INT m, n, lda, ldb, ldc; \
-    const EIGTYPE *a, *b; \
-    MKLTYPE alpha_, beta_; \
-    MatrixX##EIGPREFIX b_tmp; \
-    EIGTYPE myone(1);\
-\
-/* Set m, n, k */ \
-    m = (MKL_INT)rows;  \
-    n = (MKL_INT)cols;  \
-\
-/* Set alpha_ & beta_ */ \
-    assign_scalar_eig2mkl(alpha_, alpha); \
-    assign_scalar_eig2mkl(beta_, myone); \
-\
-/* Set lda, ldb, ldc */ \
-    lda = (MKL_INT)rhsStride; \
-    ldb = (MKL_INT)lhsStride; \
-    ldc = (MKL_INT)resStride; \
-\
-/* Set a, b, c */ \
-    if (RhsStorageOrder==RowMajor) uplo='U'; \
-    a = _rhs; \
-\
-    if (LhsStorageOrder==RowMajor) { \
-      Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
-      b_tmp = lhs.adjoint(); \
-      b = b_tmp.data(); \
-      ldb = b_tmp.outerStride(); \
-    } else b = _lhs; \
-\
-    MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
-\
-  } \
-};
-
-
-#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
-template <typename Index, \
-          int LhsStorageOrder, bool ConjugateLhs, \
-          int RhsStorageOrder, bool ConjugateRhs> \
-struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
-{\
-  static EIGEN_DONT_INLINE void run( \
-    Index rows, Index cols, \
-    const EIGTYPE* _lhs, Index lhsStride, \
-    const EIGTYPE* _rhs, Index rhsStride, \
-    EIGTYPE* res,        Index resStride, \
-    EIGTYPE alpha) \
-  { \
-    char side='R', uplo='L'; \
-    MKL_INT m, n, lda, ldb, ldc; \
-    const EIGTYPE *a, *b; \
-    MKLTYPE alpha_, beta_; \
-    MatrixX##EIGPREFIX b_tmp; \
-    Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
-    EIGTYPE myone(1); \
-\
-/* Set m, n, k */ \
-    m = (MKL_INT)rows; \
-    n = (MKL_INT)cols; \
-\
-/* Set alpha_ & beta_ */ \
-    assign_scalar_eig2mkl(alpha_, alpha); \
-    assign_scalar_eig2mkl(beta_, myone); \
-\
-/* Set lda, ldb, ldc */ \
-    lda = (MKL_INT)rhsStride; \
-    ldb = (MKL_INT)lhsStride; \
-    ldc = (MKL_INT)resStride; \
-\
-/* Set a, b, c */ \
-    if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
-      Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
-      a_tmp = rhs.conjugate(); \
-      a = a_tmp.data(); \
-      lda = a_tmp.outerStride(); \
-    } else a = _rhs; \
-    if (RhsStorageOrder==RowMajor) uplo='U'; \
-\
-    if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \
-       b = _lhs; } \
-    else { \
-      if (LhsStorageOrder==ColMajor && ConjugateLhs) { \
-        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \
-        b_tmp = lhs.conjugate(); \
-      } else \
-      if (ConjugateLhs) { \
-        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
-        b_tmp = lhs.adjoint(); \
-      } else { \
-        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
-        b_tmp = lhs.transpose(); \
-      } \
-      b = b_tmp.data(); \
-      ldb = b_tmp.outerStride(); \
-    } \
-\
-    MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
-  } \
-};
-
-EIGEN_MKL_SYMM_R(double, double, d, d)
-EIGEN_MKL_SYMM_R(float, float, f, s)
-EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c)
-
-} // end namespace internal
-
-#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -32,15 +32,8 @@ namespace internal {
 * the number of load/stores of the result by a factor 2 and to reduce
 * the instruction dependency.
 */
-
-template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized>
-struct selfadjoint_matrix_vector_product;
-
-template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
-struct selfadjoint_matrix_vector_product
-
-{
-static EIGEN_DONT_INLINE void run(
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
+static EIGEN_DONT_INLINE void product_selfadjoint_vector(
  Index size,
  const Scalar*  lhs, Index lhsStride,
  const Scalar* _rhs, Index rhsIncr,
@@ -69,15 +62,17 @@ static EIGEN_DONT_INLINE void run(
  // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
  // if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
  // this is because we need to extract packets
-  ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0);  
+  const Scalar* EIGEN_RESTRICT rhs = _rhs;
  if (rhsIncr!=1)
  {
+    Scalar* r = ei_aligned_stack_new(Scalar, size);
    const Scalar* it = _rhs;
    for (Index i=0; i<size; ++i, it+=rhsIncr)
-      rhs[i] = *it;
+      r[i] = *it;
+    rhs = r;
  }

-  Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
+  Index bound = std::max(Index(0),size-8) & 0xfffffffe;
  if (FirstTriangular)
    bound = size - bound;

@@ -92,14 +87,14 @@ static EIGEN_DONT_INLINE void run(
    Scalar t1 = cjAlpha * rhs[j+1];
    Packet ptmp1 = pset1<Packet>(t1);

-    Scalar t2(0);
+    Scalar t2 = 0;
    Packet ptmp2 = pset1<Packet>(t2);
-    Scalar t3(0);
+    Scalar t3 = 0;
    Packet ptmp3 = pset1<Packet>(t3);

    size_t starti = FirstTriangular ? 0 : j+2;
    size_t endi   = FirstTriangular ? j : size;
-    size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti);
+    size_t alignedStart = (starti) + first_aligned(&res[starti], endi-starti);
    size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);

    // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
@@ -155,7 +150,7 @@ static EIGEN_DONT_INLINE void run(
    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;

    Scalar t1 = cjAlpha * rhs[j];
-    Scalar t2(0);
+    Scalar t2 = 0;
    // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
    res[j] += cjd.pmul(internal::real(A0[j]), t1);
    for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
@@ -165,8 +160,10 @@ static EIGEN_DONT_INLINE void run(
    }
    res[j] += alpha * t2;
  }
+
+  if(rhsIncr!=1)
+    ei_aligned_stack_delete(Scalar, const_cast<Scalar*>(rhs), size);
 }
-};

 } // end namespace internal 

@@ -201,8 +198,8 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
    
    eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());

-    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
-    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+    const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
+    const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);

    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
                               * RhsBlasTraits::extractScalarFactor(m_rhs);
@@ -214,33 +211,45 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
    
    internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
    internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
-
-    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
-                                                  EvalToDest ? dest.data() : static_dest.data());
-                                                  
-    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(),
-        UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data());
    
-    if(!EvalToDest)
+    bool freeDestPtr = false;
+    ResScalar* actualDestPtr;
+    if(EvalToDest)
+      actualDestPtr = dest.data();
+    else
    {
      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      int size = dest.size();
      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      #endif
+      if((actualDestPtr=static_dest.data())==0)
+      {
+        freeDestPtr = true;
+        actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
+      }
      MappedDest(actualDestPtr, dest.size()) = dest;
    }
      
-    if(!UseRhs)
+    bool freeRhsPtr = false;
+    RhsScalar* actualRhsPtr;
+    if(UseRhs)
+      actualRhsPtr = const_cast<RhsScalar*>(rhs.data());
+    else
    {
      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      int size = rhs.size();
      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      #endif
+      if((actualRhsPtr=static_rhs.data())==0)
+      {
+        freeRhsPtr = true;
+        actualRhsPtr = ei_aligned_stack_new(RhsScalar,rhs.size());
+      }
      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
    }
      
      
-    internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
+    internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>
      (
        lhs.rows(),                             // size
        &lhs.coeffRef(0,0),  lhs.outerStride(), // lhs info
@@ -250,7 +259,11 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
      );
    
    if(!EvalToDest)
+    {
      dest = MappedDest(actualDestPtr, dest.size());
+      if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
+    }
+    if(freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, rhs.size());
  }
 };

--- a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
@@ -1,110 +0,0 @@
-/*
- Copyright (c) 2011, Intel Corporation. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without modification,
- are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its contributors may
-   be used to endorse or promote products derived from this software without
-   specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- ********************************************************************************
- *   Content : Eigen bindings to Intel(R) MKL
- *   Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
- ********************************************************************************
-*/
-
-#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
-#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
-
-namespace internal {
-
-/**********************************************************************
-* This file implements selfadjoint matrix-vector multiplication using BLAS
-**********************************************************************/
-
-// symv/hemv specialization
-
-template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
-struct selfadjoint_matrix_vector_product_symv :
-  selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
-
-#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
-template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
-struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
-  Index size, const Scalar*  lhs, Index lhsStride, \
-  const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
-    enum {\
-      IsColMajor = StorageOrder==ColMajor \
-    }; \
-    if (IsColMajor == ConjugateLhs) {\
-      selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
-        size, lhs, lhsStride, _rhs, rhsIncr, res, alpha);  \
-    } else {\
-      selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
-        size, lhs, lhsStride, _rhs, rhsIncr, res, alpha);  \
-    }\
-  } \
-}; \
-
-EIGEN_MKL_SYMV_SPECIALIZE(double)
-EIGEN_MKL_SYMV_SPECIALIZE(float)
-EIGEN_MKL_SYMV_SPECIALIZE(dcomplex)
-EIGEN_MKL_SYMV_SPECIALIZE(scomplex)
-
-#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \
-template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
-struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
-{ \
-typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
-\
-static EIGEN_DONT_INLINE void run( \
-Index size, const EIGTYPE*  lhs, Index lhsStride, \
-const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
-{ \
-  enum {\
-    IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
-    IsLower = UpLo == Lower ? 1 : 0, \
-  }; \
-  MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \
-  MKLTYPE alpha_, beta_; \
-  const EIGTYPE *x_ptr, myone(1); \
-  char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
-  assign_scalar_eig2mkl(alpha_, alpha); \
-  assign_scalar_eig2mkl(beta_, myone); \
-  SYMVVector x_tmp; \
-  if (ConjugateRhs) { \
-    Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \
-    x_tmp=map_x.conjugate(); \
-    x_ptr=x_tmp.data(); \
-    incx=1; \
-  } else x_ptr=_rhs; \
-  MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
-}\
-};
-
-EIGEN_MKL_SYMV_SPECIALIZATION(double,   double,        dsymv)
-EIGEN_MKL_SYMV_SPECIALIZATION(float,    float,         ssymv)
-EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
-EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8,  chemv)
-
-} //end of namespase
-
-#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
--- a/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/Eigen/src/Core/products/SelfadjointProduct.h
@@ -72,7 +72,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
    typedef internal::blas_traits<OtherType> OtherBlasTraits;
    typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
    typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
-    typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
+    const ActualOtherType actualOther = OtherBlasTraits::extract(other.derived());

    Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());

@@ -81,17 +81,27 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
      UseOtherDirectly = _ActualOtherType::InnerStrideAtCompileTime==1
    };
    internal::gemv_static_vector_if<Scalar,OtherType::SizeAtCompileTime,OtherType::MaxSizeAtCompileTime,!UseOtherDirectly> static_other;
-
-    ei_declare_aligned_stack_constructed_variable(Scalar, actualOtherPtr, other.size(),
-      (UseOtherDirectly ? const_cast<Scalar*>(actualOther.data()) : static_other.data()));
-      
-    if(!UseOtherDirectly)
+    
+    bool freeOtherPtr = false;
+    Scalar* actualOtherPtr;
+    if(UseOtherDirectly)
+      actualOtherPtr = const_cast<Scalar*>(actualOther.data());
+    else
+    {
+      if((actualOtherPtr=static_other.data())==0)
+      {
+        freeOtherPtr = true;
+        actualOtherPtr = ei_aligned_stack_new(Scalar,other.size());
+      }
      Map<typename _ActualOtherType::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther;
+    }
    
    selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
                              OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,
                            (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
          ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualAlpha);
+    
+    if((!UseOtherDirectly) && freeOtherPtr) ei_aligned_stack_delete(Scalar, actualOtherPtr, other.size());
  }
 };

@@ -105,12 +115,12 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
    typedef internal::blas_traits<OtherType> OtherBlasTraits;
    typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
    typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
-    typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
+    const ActualOtherType actualOther = OtherBlasTraits::extract(other.derived());

    Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());

    enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
-
+    
    internal::general_matrix_matrix_triangular_product<Index,
      Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor,   OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,
      Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
--- a/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -76,12 +76,12 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
  typedef internal::blas_traits<DerivedU> UBlasTraits;
  typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
  typedef typename internal::remove_all<ActualUType>::type _ActualUType;
-  typename internal::add_const_on_value_type<ActualUType>::type actualU = UBlasTraits::extract(u.derived());
+  const ActualUType actualU = UBlasTraits::extract(u.derived());

  typedef internal::blas_traits<DerivedV> VBlasTraits;
  typedef typename VBlasTraits::DirectLinearAccessType ActualVType;
  typedef typename internal::remove_all<ActualVType>::type _ActualVType;
-  typename internal::add_const_on_value_type<ActualVType>::type actualV = VBlasTraits::extract(v.derived());
+  const ActualVType actualV = VBlasTraits::extract(v.derived());

  // If MatrixType is row major, then we use the routine for lower triangular in the upper triangular case and
  // vice versa, and take the complex conjugate of all coefficients and vector entries.
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -58,16 +58,16 @@ template <typename Scalar, typename Index,
          int Mode, bool LhsIsTriangular,
          int LhsStorageOrder, bool ConjugateLhs,
          int RhsStorageOrder, bool ConjugateRhs,
-          int ResStorageOrder, int Version = Specialized>
+          int ResStorageOrder>
 struct product_triangular_matrix_matrix;

 template <typename Scalar, typename Index,
          int Mode, bool LhsIsTriangular,
          int LhsStorageOrder, bool ConjugateLhs,
-          int RhsStorageOrder, bool ConjugateRhs, int Version>
+          int RhsStorageOrder, bool ConjugateRhs>
 struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
                                           LhsStorageOrder,ConjugateLhs,
-                                           RhsStorageOrder,ConjugateRhs,RowMajor,Version>
+                                           RhsStorageOrder,ConjugateRhs,RowMajor>
 {
  static EIGEN_STRONG_INLINE void run(
    Index rows, Index cols, Index depth,
@@ -91,43 +91,38 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
 // implements col-major += alpha * op(triangular) * op(general)
 template <typename Scalar, typename Index, int Mode,
          int LhsStorageOrder, bool ConjugateLhs,
-          int RhsStorageOrder, bool ConjugateRhs, int Version>
+          int RhsStorageOrder, bool ConjugateRhs>
 struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
                                           LhsStorageOrder,ConjugateLhs,
-                                           RhsStorageOrder,ConjugateRhs,ColMajor,Version>
+                                           RhsStorageOrder,ConjugateRhs,ColMajor>
 {
-  
-  typedef gebp_traits<Scalar,Scalar> Traits;
-  enum {
-    SmallPanelWidth   = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
-    IsLower = (Mode&Lower) == Lower,
-    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
-  };

  static EIGEN_DONT_INLINE void run(
-    Index _rows, Index _cols, Index _depth,
+    Index rows, Index cols, Index depth,
    const Scalar* _lhs, Index lhsStride,
    const Scalar* _rhs, Index rhsStride,
    Scalar* res,        Index resStride,
    Scalar alpha)
  {
-    // strip zeros
-    Index diagSize  = (std::min)(_rows,_depth);
-    Index rows      = IsLower ? _rows : diagSize;
-    Index depth     = IsLower ? diagSize : _depth;
-    Index cols      = _cols;
-    
    const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
    const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);

+    typedef gebp_traits<Scalar,Scalar> Traits;
+    enum {
+      SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+      IsLower = (Mode&Lower) == Lower,
+      SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
+    };
+
    Index kc = depth; // cache block size along the K direction
    Index mc = rows;  // cache block size along the M direction
    Index nc = cols;  // cache block size along the N direction
    computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
+
+    Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
    std::size_t sizeB = sizeW + kc*cols;
-    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
-    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);    
+    Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
    Scalar* blockB = allocatedBlockB + sizeW;

    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
@@ -145,7 +140,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
        IsLower ? k2>0 : k2<depth;
        IsLower ? k2-=kc : k2+=kc)
    {
-      Index actual_kc = (std::min)(IsLower ? k2 : depth-k2, kc);
+      Index actual_kc = std::min(IsLower ? k2 : depth-k2, kc);
      Index actual_k2 = IsLower ? k2-actual_kc : k2;

      // align blocks with the end of the triangular part for trapezoidal lhs
@@ -158,11 +153,10 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
      pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, actual_kc, cols);

      // the selected lhs's panel has to be split in three different parts:
-      //  1 - the part which is zero => skip it
+      //  1 - the part which is above the diagonal block => skip it
      //  2 - the diagonal block => special kernel
-      //  3 - the dense panel below (lower case) or above (upper case) the diagonal block => GEPP
-
-      // the block diagonal, if any:
+      //  3 - the panel below the diagonal block => GEPP
+      // the block diagonal, if any
      if(IsLower || actual_k2<rows)
      {
        // for each small vertical panels of lhs
@@ -200,13 +194,13 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
          }
        }
      }
-      // the part below (lower case) or above (upper case) the diagonal => GEPP
+      // the part below the diagonal => GEPP
      {
        Index start = IsLower ? k2 : 0;
-        Index end   = IsLower ? rows : (std::min)(actual_k2,rows);
+        Index end   = IsLower ? rows : std::min(actual_k2,rows);
        for(Index i2=start; i2<end; i2+=mc)
        {
-          const Index actual_mc = (std::min)(i2+mc,end)-i2;
+          const Index actual_mc = std::min(i2+mc,end)-i2;
          gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
            (blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);

@@ -214,49 +208,48 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
        }
      }
    }
+
+    ei_aligned_stack_delete(Scalar, blockA, kc*mc);
+    ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
+//     delete[] allocatedBlockB;
  }
 };

 // implements col-major += alpha * op(general) * op(triangular)
 template <typename Scalar, typename Index, int Mode,
          int LhsStorageOrder, bool ConjugateLhs,
-          int RhsStorageOrder, bool ConjugateRhs, int Version>
+          int RhsStorageOrder, bool ConjugateRhs>
 struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
                                           LhsStorageOrder,ConjugateLhs,
-                                           RhsStorageOrder,ConjugateRhs,ColMajor,Version>
+                                           RhsStorageOrder,ConjugateRhs,ColMajor>
 {
-  typedef gebp_traits<Scalar,Scalar> Traits;
-  enum {
-    SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
-    IsLower = (Mode&Lower) == Lower,
-    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
-  };

  static EIGEN_DONT_INLINE void run(
-    Index _rows, Index _cols, Index _depth,
+    Index rows, Index cols, Index depth,
    const Scalar* _lhs, Index lhsStride,
    const Scalar* _rhs, Index rhsStride,
    Scalar* res,        Index resStride,
    Scalar alpha)
  {
-    // strip zeros
-    Index diagSize  = (std::min)(_cols,_depth);
-    Index rows      = _rows;
-    Index depth     = IsLower ? _depth : diagSize;
-    Index cols      = IsLower ? diagSize : _cols;
-    
    const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
    const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);

+    typedef gebp_traits<Scalar,Scalar> Traits;
+    enum {
+      SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+      IsLower = (Mode&Lower) == Lower,
+      SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
+    };
+
    Index kc = depth; // cache block size along the K direction
    Index mc = rows;  // cache block size along the M direction
    Index nc = cols;  // cache block size along the N direction
    computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);

+    Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
    std::size_t sizeB = sizeW + kc*cols;
-    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
-    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
+    Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar,sizeB);
    Scalar* blockB = allocatedBlockB + sizeW;

    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
@@ -275,7 +268,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
        IsLower ? k2<depth  : k2>0;
        IsLower ? k2+=kc   : k2-=kc)
    {
-      Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc);
+      Index actual_kc = std::min(IsLower ? depth-k2 : k2, kc);
      Index actual_k2 = IsLower ? k2 : k2-actual_kc;

      // align blocks with the end of the triangular part for trapezoidal rhs
@@ -286,7 +279,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
      }

      // remaining size
-      Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2;
+      Index rs = IsLower ? std::min(cols,actual_k2) : cols - k2;
      // size of the triangular part
      Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;

@@ -327,7 +320,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,

      for (Index i2=0; i2<rows; i2+=mc)
      {
-        const Index actual_mc = (std::min)(mc,rows-i2);
+        const Index actual_mc = std::min(mc,rows-i2);
        pack_lhs(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);

        // triangular kernel
@@ -354,6 +347,9 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
                    -1, -1, 0, 0, allocatedBlockB);
      }
    }
+
+    ei_aligned_stack_delete(Scalar, blockA, kc*mc);
+    ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
  }
 };

@@ -378,8 +374,8 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>

  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
  {
-    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
-    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+    const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
+    const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);

    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
                               * RhsBlasTraits::extractScalarFactor(m_rhs);
--- a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
@@ -1,305 +0,0 @@
-/*
- Copyright (c) 2011, Intel Corporation. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without modification,
- are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its contributors may
-   be used to endorse or promote products derived from this software without
-   specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- ********************************************************************************
- *   Content : Eigen bindings to Intel(R) MKL
- *   Triangular matrix * matrix product functionality based on ?TRMM.
- ********************************************************************************
-*/
-
-#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
-#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
-
-namespace internal {
-
-
-template <typename Scalar, typename Index,
-          int Mode, bool LhsIsTriangular,
-          int LhsStorageOrder, bool ConjugateLhs,
-          int RhsStorageOrder, bool ConjugateRhs,
-          int ResStorageOrder>
-struct product_triangular_matrix_matrix_trmm :
-       product_triangular_matrix_matrix<Scalar,Index,Mode,
-          LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
-          RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {};
-
-
-// try to go to BLAS specialization
-#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
-template <typename Index, int Mode, \
-          int LhsStorageOrder, bool ConjugateLhs, \
-          int RhsStorageOrder, bool ConjugateRhs> \
-struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
-           LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \
-  static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
-    const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) { \
-      product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
-        LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
-        RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
-        _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
-  } \
-};
-
-EIGEN_MKL_TRMM_SPECIALIZE(double, true)
-EIGEN_MKL_TRMM_SPECIALIZE(double, false)
-EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true)
-EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false)
-EIGEN_MKL_TRMM_SPECIALIZE(float, true)
-EIGEN_MKL_TRMM_SPECIALIZE(float, false)
-EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true)
-EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false)
-
-// implements col-major += alpha * op(triangular) * op(general)
-#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
-template <typename Index, int Mode, \
-          int LhsStorageOrder, bool ConjugateLhs, \
-          int RhsStorageOrder, bool ConjugateRhs> \
-struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
-         LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
-{ \
-  enum { \
-    IsLower = (Mode&Lower) == Lower, \
-    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
-    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
-    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
-    LowUp = IsLower ? Lower : Upper, \
-    conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
-  }; \
-\
-  static EIGEN_DONT_INLINE void run( \
-    Index _rows, Index _cols, Index _depth, \
-    const EIGTYPE* _lhs, Index lhsStride, \
-    const EIGTYPE* _rhs, Index rhsStride, \
-    EIGTYPE* res,        Index resStride, \
-    EIGTYPE alpha) \
-  { \
-   Index diagSize  = (std::min)(_rows,_depth); \
-   Index rows      = IsLower ? _rows : diagSize; \
-   Index depth     = IsLower ? diagSize : _depth; \
-   Index cols      = _cols; \
-\
-   typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
-   typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
-\
-/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
-   if (rows != depth) { \
-\
-     int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
-\
-     if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
-     /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
-       product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
-       LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
-           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
-     /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
-     } else { \
-     /* Make sense to call GEMM */ \
-       Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
-       MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
-       MKL_INT aStride = aa_tmp.outerStride(); \
-       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \
-       general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
-       rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
-\
-     /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
-     } \
-     return; \
-   } \
-   char side = 'L', transa, uplo, diag = 'N'; \
-   EIGTYPE *b; \
-   const EIGTYPE *a; \
-   MKL_INT m, n, k, lda, ldb, ldc; \
-   MKLTYPE alpha_; \
-\
-/* Set alpha_*/ \
-   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
-\
-/* Set m, n */ \
-   m = (MKL_INT)diagSize; \
-   n = (MKL_INT)cols; \
-\
-/* Set trans */ \
-   transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
-\
-/* Set b, ldb */ \
-   Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \
-   MatrixX##EIGPREFIX b_tmp; \
-\
-   if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
-   b = b_tmp.data(); \
-   ldb = b_tmp.outerStride(); \
-\
-/* Set uplo */ \
-   uplo = IsLower ? 'L' : 'U'; \
-   if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
-/* Set a, lda */ \
-   Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
-   MatrixLhs a_tmp; \
-\
-   if ((conjA!=0) || (SetDiag==0)) { \
-     if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \
-     if (IsZeroDiag) \
-       a_tmp.diagonal().setZero(); \
-     else if (IsUnitDiag) \
-       a_tmp.diagonal().setOnes();\
-     a = a_tmp.data(); \
-     lda = a_tmp.outerStride(); \
-   } else { \
-     a = _lhs; \
-     lda = lhsStride; \
-   } \
-   /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \
-/* call ?trmm*/ \
-   MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
-\
-/* Add op(a_triangular)*b into res*/ \
-   Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
-   res_tmp=res_tmp+b_tmp; \
-  } \
-};
-
-EIGEN_MKL_TRMM_L(double, double, d, d)
-EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_TRMM_L(float, float, f, s)
-EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c)
-
-// implements col-major += alpha * op(general) * op(triangular)
-#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
-template <typename Index, int Mode, \
-          int LhsStorageOrder, bool ConjugateLhs, \
-          int RhsStorageOrder, bool ConjugateRhs> \
-struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
-         LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
-{ \
-  enum { \
-    IsLower = (Mode&Lower) == Lower, \
-    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
-    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
-    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
-    LowUp = IsLower ? Lower : Upper, \
-    conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
-  }; \
-\
-  static EIGEN_DONT_INLINE void run( \
-    Index _rows, Index _cols, Index _depth, \
-    const EIGTYPE* _lhs, Index lhsStride, \
-    const EIGTYPE* _rhs, Index rhsStride, \
-    EIGTYPE* res,        Index resStride, \
-    EIGTYPE alpha) \
-  { \
-   Index diagSize  = (std::min)(_cols,_depth); \
-   Index rows      = _rows; \
-   Index depth     = IsLower ? _depth : diagSize; \
-   Index cols      = IsLower ? diagSize : _cols; \
-\
-   typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
-   typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
-\
-/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
-   if (cols != depth) { \
-\
-     int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
-\
-     if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
-     /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
-       product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
-       LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
-           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
-       /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
-     } else { \
-     /* Make sense to call GEMM */ \
-       Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
-       MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
-       MKL_INT aStride = aa_tmp.outerStride(); \
-       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \
-       general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
-       rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, blocking); \
-\
-     /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
-     } \
-     return; \
-   } \
-   char side = 'R', transa, uplo, diag = 'N'; \
-   EIGTYPE *b; \
-   const EIGTYPE *a; \
-   MKL_INT m, n, k, lda, ldb, ldc; \
-   MKLTYPE alpha_; \
-\
-/* Set alpha_*/ \
-   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
-\
-/* Set m, n */ \
-   m = (MKL_INT)rows; \
-   n = (MKL_INT)diagSize; \
-\
-/* Set trans */ \
-   transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
-\
-/* Set b, ldb */ \
-   Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
-   MatrixX##EIGPREFIX b_tmp; \
-\
-   if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
-   b = b_tmp.data(); \
-   ldb = b_tmp.outerStride(); \
-\
-/* Set uplo */ \
-   uplo = IsLower ? 'L' : 'U'; \
-   if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
-/* Set a, lda */ \
-   Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \
-   MatrixRhs a_tmp; \
-\
-   if ((conjA!=0) || (SetDiag==0)) { \
-     if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \
-     if (IsZeroDiag) \
-       a_tmp.diagonal().setZero(); \
-     else if (IsUnitDiag) \
-       a_tmp.diagonal().setOnes();\
-     a = a_tmp.data(); \
-     lda = a_tmp.outerStride(); \
-   } else { \
-     a = _rhs; \
-     lda = rhsStride; \
-   } \
-   /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \
-/* call ?trmm*/ \
-   MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
-\
-/* Add op(a_triangular)*b into res*/ \
-   Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
-   res_tmp=res_tmp+b_tmp; \
-  } \
-};
-
-EIGEN_MKL_TRMM_R(double, double, d, d)
-EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_TRMM_R(float, float, f, s)
-EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c)
-
-} // end namespace internal
-
-#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -27,25 +27,24 @@

 namespace internal {

-template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder, int Version=Specialized>
-struct triangular_matrix_vector_product;
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
+struct product_triangular_matrix_vector;

-template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
-struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs>
+struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor>
 {
  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  enum {
    IsLower = ((Mode&Lower)==Lower),
-    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
-    HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
+    HasUnitDiag = (Mode & UnitDiag)==UnitDiag
  };
-  static EIGEN_DONT_INLINE  void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+  static EIGEN_DONT_INLINE  void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride,
                                     const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
  {
+    EIGEN_UNUSED_VARIABLE(resIncr);
+    eigen_assert(resIncr==1);
+    
    static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
-    Index size = (std::min)(_rows,_cols);
-    Index rows = IsLower ? _rows : (std::min)(_rows,_cols);
-    Index cols = IsLower ? (std::min)(_rows,_cols) : _cols;

    typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
    const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
@@ -58,57 +57,48 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
    typedef Map<Matrix<ResScalar,Dynamic,1> > ResMap;
    ResMap res(_res,rows);

-    for (Index pi=0; pi<size; pi+=PanelWidth)
+    for (Index pi=0; pi<cols; pi+=PanelWidth)
    {
-      Index actualPanelWidth = (std::min)(PanelWidth, size-pi);
+      Index actualPanelWidth = std::min(PanelWidth, cols-pi);
      for (Index k=0; k<actualPanelWidth; ++k)
      {
        Index i = pi + k;
-        Index s = IsLower ? ((HasUnitDiag||HasZeroDiag) ? i+1 : i ) : pi;
+        Index s = IsLower ? (HasUnitDiag ? i+1 : i ) : pi;
        Index r = IsLower ? actualPanelWidth-k : k+1;
-        if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
+        if ((!HasUnitDiag) || (--r)>0)
          res.segment(s,r) += (alpha * cjRhs.coeff(i)) * cjLhs.col(i).segment(s,r);
        if (HasUnitDiag)
          res.coeffRef(i) += alpha * cjRhs.coeff(i);
      }
-      Index r = IsLower ? rows - pi - actualPanelWidth : pi;
+      Index r = IsLower ? cols - pi - actualPanelWidth : pi;
      if (r>0)
      {
        Index s = IsLower ? pi+actualPanelWidth : 0;
-        general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
+        general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run(
            r, actualPanelWidth,
            &lhs.coeffRef(s,pi), lhsStride,
            &rhs.coeffRef(pi), rhsIncr,
            &res.coeffRef(s), resIncr, alpha);
      }
    }
-    if((!IsLower) && cols>size)
-    {
-      general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run(
-          rows, cols-size,
-          &lhs.coeffRef(0,size), lhsStride,
-          &rhs.coeffRef(size), rhsIncr,
-          _res, resIncr, alpha);
-    }
  }
 };

-template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
-struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs>
+struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor>
 {
  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  enum {
    IsLower = ((Mode&Lower)==Lower),
-    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
-    HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
+    HasUnitDiag = (Mode & UnitDiag)==UnitDiag
  };
-  static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+  static void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride,
                  const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
  {
+    eigen_assert(rhsIncr==1);
+    EIGEN_UNUSED_VARIABLE(rhsIncr);
+    
    static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
-    Index diagSize = (std::min)(_rows,_cols);
-    Index rows = IsLower ? _rows : diagSize;
-    Index cols = IsLower ? diagSize : _cols;

    typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
    const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
@@ -121,15 +111,15 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
    typedef Map<Matrix<ResScalar,Dynamic,1>, 0, InnerStride<> > ResMap;
    ResMap res(_res,rows,InnerStride<>(resIncr));
    
-    for (Index pi=0; pi<diagSize; pi+=PanelWidth)
+    for (Index pi=0; pi<cols; pi+=PanelWidth)
    {
-      Index actualPanelWidth = (std::min)(PanelWidth, diagSize-pi);
+      Index actualPanelWidth = std::min(PanelWidth, cols-pi);
      for (Index k=0; k<actualPanelWidth; ++k)
      {
        Index i = pi + k;
-        Index s = IsLower ? pi  : ((HasUnitDiag||HasZeroDiag) ? i+1 : i);
+        Index s = IsLower ? pi  : (HasUnitDiag ? i+1 : i);
        Index r = IsLower ? k+1 : actualPanelWidth-k;
-        if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
+        if ((!HasUnitDiag) || (--r)>0)
          res.coeffRef(i) += alpha * (cjLhs.row(i).segment(s,r).cwiseProduct(cjRhs.segment(s,r).transpose())).sum();
        if (HasUnitDiag)
          res.coeffRef(i) += alpha * cjRhs.coeff(i);
@@ -138,21 +128,13 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
      if (r>0)
      {
        Index s = IsLower ? 0 : pi + actualPanelWidth;
-        general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
+        general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run(
            actualPanelWidth, r,
            &lhs.coeffRef(pi,s), lhsStride,
            &rhs.coeffRef(s), rhsIncr,
            &res.coeffRef(pi), resIncr, alpha);
      }
    }
-    if(IsLower && rows>diagSize)
-    {
-      general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run(
-            rows-diagSize, cols,
-            &lhs.coeffRef(diagSize,0), lhsStride,
-            &rhs.coeffRef(0), rhsIncr,
-            &res.coeffRef(diagSize), resIncr, alpha);
-    }
  }
 };

@@ -203,8 +185,8 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
  {
    eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
-
-    typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
+    
+    typedef TriangularProduct<(Mode & UnitDiag) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
    Transpose<Dest> dstT(dst);
    internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run(
      TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha);
@@ -232,8 +214,8 @@ template<> struct trmv_selector<ColMajor>
    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
    typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;

-    typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
-    typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+    const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
+    const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());

    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
@@ -253,15 +235,23 @@ template<> struct trmv_selector<ColMajor>
    
    RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);

-    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
-                                                  evalToDest ? dest.data() : static_dest.data());
-
-    if(!evalToDest)
+    ResScalar* actualDestPtr;
+    bool freeDestPtr = false;
+    if (evalToDest)
+    {
+      actualDestPtr = dest.data();
+    }
+    else
    {
      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      int size = dest.size();
      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      #endif
+      if((actualDestPtr = static_dest.data())==0)
+      {
+        freeDestPtr = true;
+        actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
+      }
      if(!alphaIsCompatible)
      {
        MappedDest(actualDestPtr, dest.size()).setZero();
@@ -271,7 +261,7 @@ template<> struct trmv_selector<ColMajor>
        MappedDest(actualDestPtr, dest.size()) = dest;
    }
    
-    internal::triangular_matrix_vector_product
+    internal::product_triangular_matrix_vector
      <Index,Mode,
       LhsScalar, LhsBlasTraits::NeedToConjugate,
       RhsScalar, RhsBlasTraits::NeedToConjugate,
@@ -287,6 +277,7 @@ template<> struct trmv_selector<ColMajor>
        dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
      else
        dest = MappedDest(actualDestPtr, dest.size());
+      if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
    }
  }
 };
@@ -319,19 +310,27 @@ template<> struct trmv_selector<RowMajor>

    gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;

-    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
-        DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
-
-    if(!DirectlyUseRhs)
+    RhsScalar* actualRhsPtr;
+    bool freeRhsPtr = false;
+    if (DirectlyUseRhs)
+    {
+      actualRhsPtr = const_cast<RhsScalar*>(actualRhs.data());
+    }
+    else
    {
      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      int size = actualRhs.size();
      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      #endif
+      if((actualRhsPtr = static_rhs.data())==0)
+      {
+        freeRhsPtr = true;
+        actualRhsPtr = ei_aligned_stack_new(RhsScalar, actualRhs.size());
+      }
      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
    }
    
-    internal::triangular_matrix_vector_product
+    internal::product_triangular_matrix_vector
      <Index,Mode,
       LhsScalar, LhsBlasTraits::NeedToConjugate,
       RhsScalar, RhsBlasTraits::NeedToConjugate,
@@ -341,6 +340,8 @@ template<> struct trmv_selector<RowMajor>
            actualRhsPtr,1,
            dest.data(),dest.innerStride(),
            actualAlpha);
+
+    if((!DirectlyUseRhs) && freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, prod.rhs().size());
  }
 };

--- a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
@@ -1,245 +0,0 @@
-/*
- Copyright (c) 2011, Intel Corporation. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without modification,
- are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its contributors may
-   be used to endorse or promote products derived from this software without
-   specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- ********************************************************************************
- *   Content : Eigen bindings to Intel(R) MKL
- *   Triangular matrix-vector product functionality based on ?TRMV.
- ********************************************************************************
-*/
-
-#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
-#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
-
-namespace internal {
-
-/**********************************************************************
-* This file implements triangular matrix-vector multiplication using BLAS
-**********************************************************************/
-
-// trmv/hemv specialization
-
-template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
-struct triangular_matrix_vector_product_trmv :
-  triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {};
-
-#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
-template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
-struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
-                                     const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
-      triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
-        _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
-  } \
-}; \
-template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
-struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
-                                     const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
-      triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
-        _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
-  } \
-};
-
-EIGEN_MKL_TRMV_SPECIALIZE(double)
-EIGEN_MKL_TRMV_SPECIALIZE(float)
-EIGEN_MKL_TRMV_SPECIALIZE(dcomplex)
-EIGEN_MKL_TRMV_SPECIALIZE(scomplex)
-
-// implements col-major: res += alpha * op(triangular) * vector
-#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
-template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
-struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
-  enum { \
-    IsLower = (Mode&Lower) == Lower, \
-    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
-    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
-    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
-    LowUp = IsLower ? Lower : Upper \
-  }; \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
-                             const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
- { \
-   if (ConjLhs || IsZeroDiag) { \
-     triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
-       _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
-     return; \
-   }\
-   Index size = (std::min)(_rows,_cols); \
-   Index rows = IsLower ? _rows : size; \
-   Index cols = IsLower ? size : _cols; \
-\
-   typedef VectorX##EIGPREFIX VectorRhs; \
-   EIGTYPE *x, *y;\
-\
-/* Set x*/ \
-   Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
-   VectorRhs x_tmp; \
-   if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
-   x = x_tmp.data(); \
-\
-/* Square part handling */\
-\
-   char trans, uplo, diag; \
-   MKL_INT m, n, k, lda, incx, incy; \
-   EIGTYPE const *a; \
-   MKLTYPE alpha_, beta_; \
-   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
-   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
-\
-/* Set m, n */ \
-   n = (MKL_INT)size; \
-   lda = lhsStride; \
-   incx = 1; \
-   incy = resIncr; \
-\
-/* Set uplo, trans and diag*/ \
-   trans = 'N'; \
-   uplo = IsLower ? 'L' : 'U'; \
-   diag = IsUnitDiag ? 'U' : 'N'; \
-\
-/* call ?TRMV*/ \
-      std::cout << "TRMV: CM\n";\
-   MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
-\
-/* Add op(a_tr)rhs into res*/ \
-   MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
-/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
-   if (size<(std::max)(rows,cols)) { \
-     typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
-     if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
-     x = x_tmp.data(); \
-     if (size<rows) { \
-       y = _res + size*resIncr; \
-       a = _lhs + size; \
-       m = rows-size; \
-       n = size; \
-     } \
-     if (size<cols) { \
-       x += size; \
-       y = _res; \
-       a = _lhs + size*lda; \
-       m = size; \
-       n = cols-size; \
-     } \
-     MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
-   } \
-  } \
-};
-
-EIGEN_MKL_TRMV_CM(double, double, d, d)
-EIGEN_MKL_TRMV_CM(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_TRMV_CM(float, float, f, s)
-EIGEN_MKL_TRMV_CM(scomplex, MKL_Complex8, cf, c)
-
-// implements row-major: res += alpha * op(triangular) * vector
-#define EIGEN_MKL_TRMV_RM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
-template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
-struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
-  enum { \
-    IsLower = (Mode&Lower) == Lower, \
-    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
-    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
-    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
-    LowUp = IsLower ? Lower : Upper \
-  }; \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
-                             const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
- { \
-   if (IsZeroDiag) { \
-     triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
-       _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
-     return; \
-   }\
-   Index size = (std::min)(_rows,_cols); \
-   Index rows = IsLower ? _rows : size; \
-   Index cols = IsLower ? size : _cols; \
-\
-   typedef VectorX##EIGPREFIX VectorRhs; \
-   EIGTYPE *x, *y;\
-\
-/* Set x*/ \
-   Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
-   VectorRhs x_tmp; \
-   if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
-   x = x_tmp.data(); \
-\
-/* Square part handling */\
-\
-   char trans, uplo, diag; \
-   MKL_INT m, n, k, lda, incx, incy; \
-   EIGTYPE const *a; \
-   MKLTYPE alpha_, beta_; \
-   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
-   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
-\
-/* Set m, n */ \
-   n = (MKL_INT)size; \
-   lda = lhsStride; \
-   incx = 1; \
-   incy = resIncr; \
-\
-/* Set uplo, trans and diag*/ \
-   trans = ConjLhs ? 'C' : 'T'; \
-   uplo = IsLower ? 'U' : 'L'; \
-   diag = IsUnitDiag ? 'U' : 'N'; \
-\
-/* call ?TRMV*/ \
-      std::cout << "TRMV: RM\n";\
-   MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
-\
-/* Add op(a_tr)rhs into res*/ \
-   MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
-/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
-   if (size<(std::max)(rows,cols)) { \
-     typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
-     if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
-     x = x_tmp.data(); \
-     if (size<rows) { \
-       y = _res + size*resIncr; \
-       a = _lhs + size*lda; \
-       m = rows-size; \
-       n = size; \
-     } \
-     if (size<cols) { \
-       x += size; \
-       y = _res; \
-       a = _lhs + size; \
-       m = size; \
-       n = cols-size; \
-     } \
-     MKLPREFIX##gemv(&trans, &n, &m, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
-   } \
-  } \
-};
-
-EIGEN_MKL_TRMV_RM(double, double, d, d)
-EIGEN_MKL_TRMV_RM(dcomplex, MKL_Complex16, cd, z)
-EIGEN_MKL_TRMV_RM(float, float, f, s)
-EIGEN_MKL_TRMV_RM(scomplex, MKL_Complex8, cf, c)
-
-} //end of namespase
-
-#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -70,48 +70,38 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
    Index nc = cols;  // cache block size along the N direction
    computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);

+    Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
    std::size_t sizeB = sizeW + kc*cols;
-    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
-    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
+    Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
    Scalar* blockB = allocatedBlockB + sizeW;
-    Scalar* blockW = allocatedBlockB;

    conj_if<Conjugate> conj;
    gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
    gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
    gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;

-    // the goal here is to subdivise the Rhs panels such that we keep some cache
-    // coherence when accessing the rhs elements
-    std::ptrdiff_t l1, l2;
-    manage_caching_sizes(GetAction, &l1, &l2);
-    Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0;
-    subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
-
    for(Index k2=IsLower ? 0 : size;
        IsLower ? k2<size : k2>0;
        IsLower ? k2+=kc : k2-=kc)
    {
-      const Index actual_kc = (std::min)(IsLower ? size-k2 : k2, kc);
+      const Index actual_kc = std::min(IsLower ? size-k2 : k2, kc);

      // We have selected and packed a big horizontal panel R1 of rhs. Let B be the packed copy of this panel,
      // and R2 the remaining part of rhs. The corresponding vertical panel of lhs is split into
      // A11 (the triangular part) and A21 the remaining rectangular part.
      // Then the high level algorithm is:
      //  - B = R1                    => general block copy (done during the next step)
-      //  - R1 = A11^-1 B             => tricky part
+      //  - R1 = L1^-1 B              => tricky part
      //  - update B from the new R1  => actually this has to be performed continuously during the above step
-      //  - R2 -= A21 * B             => GEPP
+      //  - R2 = L2 * B               => GEPP

-      // The tricky part: compute R1 = A11^-1 B while updating B from R1
-      // The idea is to split A11 into multiple small vertical panels.
-      // Each panel can be split into a small triangular part T1k which is processed without optimization,
-      // and the remaining small part T2k which is processed using gebp with appropriate block strides
-      for(Index j2=0; j2<cols; j2+=subcols)
+      // The tricky part: compute R1 = L1^-1 B while updating B from R1
+      // The idea is to split L1 into multiple small vertical panels.
+      // Each panel can be split into a small triangular part A1 which is processed without optimization,
+      // and the remaining small part A2 which is processed using gebp with appropriate block strides
      {
-        Index actual_cols = (std::min)(cols-j2,subcols);
-        // for each small vertical panels [T1k^T, T2k^T]^T of lhs
+        // for each small vertical panels of lhs
        for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
        {
          Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
@@ -124,11 +114,11 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
            Index rs = actualPanelWidth - k - 1; // remaining size

            Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
-            for (Index j=j2; j<j2+actual_cols; ++j)
+            for (Index j=0; j<cols; ++j)
            {
              if (TriStorageOrder==RowMajor)
              {
-                Scalar b(0);
+                Scalar b = 0;
                const Scalar* l = &tri(i,s);
                Scalar* r = &other(s,j);
                for (Index i3=0; i3<k; ++i3)
@@ -153,7 +143,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
          Index blockBOffset = IsLower ? k1 : lengthTarget;

          // update the respective rows of B from other
-          pack_rhs(blockB+actual_kc*j2, &other(startBlock,j2), otherStride, actualPanelWidth, actual_cols, actual_kc, blockBOffset);
+          pack_rhs(blockB, _other+startBlock, otherStride, actualPanelWidth, cols, actual_kc, blockBOffset);

          // GEBP
          if (lengthTarget>0)
@@ -162,19 +152,19 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO

            pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget);

-            gebp_kernel(&other(startTarget,j2), otherStride, blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),
-                        actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
+            gebp_kernel(_other+startTarget, otherStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, Scalar(-1),
+                        actualPanelWidth, actual_kc, 0, blockBOffset);
          }
        }
      }
-      
-      // R2 -= A21 * B => GEPP
+
+      // R2 = A2 * B => GEPP
      {
        Index start = IsLower ? k2+kc : 0;
        Index end   = IsLower ? size : k2-kc;
        for(Index i2=start; i2<end; i2+=mc)
        {
-          const Index actual_mc = (std::min)(mc,end-i2);
+          const Index actual_mc = std::min(mc,end-i2);
          if (actual_mc>0)
          {
            pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc);
@@ -184,6 +174,9 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
        }
      }
    }
+
+    ei_aligned_stack_delete(Scalar, blockA, kc*mc);
+    ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
  }
 };

@@ -216,10 +209,10 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
    Index nc = rows;  // cache block size along the N direction
    computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);

+    Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
    std::size_t sizeW = kc*Traits::WorkSpaceFactor;
    std::size_t sizeB = sizeW + kc*size;
-    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
-    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
+    Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
    Scalar* blockB = allocatedBlockB + sizeW;

    conj_if<Conjugate> conj;
@@ -232,7 +225,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
        IsLower ? k2>0 : k2<size;
        IsLower ? k2-=kc : k2+=kc)
    {
-      const Index actual_kc = (std::min)(IsLower ? k2 : size-k2, kc);
+      const Index actual_kc = std::min(IsLower ? k2 : size-k2, kc);
      Index actual_k2 = IsLower ? k2-actual_kc : k2 ;

      Index startPanel = IsLower ? 0 : k2+actual_kc;
@@ -261,7 +254,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage

      for(Index i2=0; i2<rows; i2+=mc)
      {
-        const Index actual_mc = (std::min)(mc,rows-i2);
+        const Index actual_mc = std::min(mc,rows-i2);

        // triangular solver kernel
        {
@@ -321,6 +314,9 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
                      -1, -1, 0, 0, allocatedBlockB);
      }
    }
+
+    ei_aligned_stack_delete(Scalar, blockA, kc*mc);
+    ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
  }
 };

--- a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
@@ -1,151 +0,0 @@
-/*
- Copyright (c) 2011, Intel Corporation. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without modification,
- are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its contributors may
-   be used to endorse or promote products derived from this software without
-   specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- ********************************************************************************
- *   Content : Eigen bindings to Intel(R) MKL
- *   Triangular matrix * matrix product functionality based on ?TRMM.
- ********************************************************************************
-*/
-
-#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
-#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
-
-namespace internal {
-
-// implements LeftSide op(triangular)^-1 * general
-#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \
-template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
-struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
-{ \
-  enum { \
-    IsLower = (Mode&Lower) == Lower, \
-    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
-    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
-    conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
-  }; \
-  static EIGEN_DONT_INLINE void run( \
-      Index size, Index otherSize, \
-      const EIGTYPE* _tri, Index triStride, \
-      EIGTYPE* _other, Index otherStride) \
-  { \
-   MKL_INT m = size, n = otherSize, lda, ldb; \
-   char side = 'L', uplo, diag='N', transa; \
-   /* Set alpha_ */ \
-   MKLTYPE alpha; \
-   EIGTYPE myone(1); \
-   assign_scalar_eig2mkl(alpha, myone); \
-   ldb = otherStride;\
-\
-   const EIGTYPE *a; \
-/* Set trans */ \
-   transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
-/* Set uplo */ \
-   uplo = IsLower ? 'L' : 'U'; \
-   if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
-/* Set a, lda */ \
-   typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
-   Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
-   MatrixTri a_tmp; \
-\
-   if (conjA) { \
-     a_tmp = tri.conjugate(); \
-     a = a_tmp.data(); \
-     lda = a_tmp.outerStride(); \
-   } else { \
-     a = _tri; \
-     lda = triStride; \
-   } \
-   if (IsUnitDiag) diag='U'; \
-/* call ?trsm*/ \
-   MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
- } \
-};
-
-EIGEN_MKL_TRSM_L(double, double, d)
-EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z)
-EIGEN_MKL_TRSM_L(float, float, s)
-EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c)
-
-
-// implements RightSide general * op(triangular)^-1
-#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \
-template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
-struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
-{ \
-  enum { \
-    IsLower = (Mode&Lower) == Lower, \
-    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
-    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
-    conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
-  }; \
-  static EIGEN_DONT_INLINE void run( \
-      Index size, Index otherSize, \
-      const EIGTYPE* _tri, Index triStride, \
-      EIGTYPE* _other, Index otherStride) \
-  { \
-   MKL_INT m = otherSize, n = size, lda, ldb; \
-   char side = 'R', uplo, diag='N', transa; \
-   /* Set alpha_ */ \
-   MKLTYPE alpha; \
-   EIGTYPE myone(1); \
-   assign_scalar_eig2mkl(alpha, myone); \
-   ldb = otherStride;\
-\
-   const EIGTYPE *a; \
-/* Set trans */ \
-   transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
-/* Set uplo */ \
-   uplo = IsLower ? 'L' : 'U'; \
-   if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
-/* Set a, lda */ \
-   typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
-   Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
-   MatrixTri a_tmp; \
-\
-   if (conjA) { \
-     a_tmp = tri.conjugate(); \
-     a = a_tmp.data(); \
-     lda = a_tmp.outerStride(); \
-   } else { \
-     a = _tri; \
-     lda = triStride; \
-   } \
-   if (IsUnitDiag) diag='U'; \
-/* call ?trsm*/ \
-   MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
-   /*std::cout << "TRMS_L specialization!\n";*/ \
- } \
-};
-
-EIGEN_MKL_TRSM_R(double, double, d)
-EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z)
-EIGEN_MKL_TRSM_R(float, float, s)
-EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c)
-
-
-} // end namespace internal
-
-#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
--- a/Eigen/src/Core/products/TriangularSolverVector.h
+++ b/Eigen/src/Core/products/TriangularSolverVector.h
@@ -60,7 +60,7 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
        IsLower ? pi<size : pi>0;
        IsLower ? pi+=PanelWidth : pi-=PanelWidth)
    {
-      Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);
+      Index actualPanelWidth = std::min(IsLower ? size - pi : pi, PanelWidth);

      Index r = IsLower ? pi : size - pi; // remaining size
      if (r > 0)
@@ -114,7 +114,7 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
        IsLower ? pi<size : pi>0;
        IsLower ? pi+=PanelWidth : pi-=PanelWidth)
    {
-      Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);
+      Index actualPanelWidth = std::min(IsLower ? size - pi : pi, PanelWidth);
      Index startBlock = IsLower ? pi : pi-actualPanelWidth;
      Index endBlock = IsLower ? pi + actualPanelWidth : 0;

--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -47,7 +47,7 @@ template<
  int ResStorageOrder>
 struct general_matrix_matrix_product;

-template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version=Specialized>
+template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
 struct general_matrix_vector_product;


@@ -56,15 +56,11 @@ template<bool Conjugate> struct conj_if;
 template<> struct conj_if<true> {
  template<typename T>
  inline T operator()(const T& x) { return conj(x); }
-  template<typename T>
-  inline T pconj(const T& x) { return internal::pconj(x); }
 };

 template<> struct conj_if<false> {
  template<typename T>
  inline const T& operator()(const T& x) { return x; }
-  template<typename T>
-  inline const T& pconj(const T& x) { return x; }
 };

 template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
@@ -122,11 +118,11 @@ template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::comp
 };

 template<typename From,typename To> struct get_factor {
-  static EIGEN_STRONG_INLINE To run(const From& x) { return x; }
+  EIGEN_STRONG_INLINE static To run(const From& x) { return x; }
 };

 template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
-  static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return real(x); }
+  EIGEN_STRONG_INLINE static typename NumTraits<Scalar>::Real run(const Scalar& x) { return real(x); }
 };

 // Lightweight helper class to access matrix coefficients.
@@ -179,7 +175,7 @@ template<typename XprType> struct blas_traits
    ExtractType,
    typename _ExtractType::PlainObject
    >::type DirectLinearAccessType;
-  static inline ExtractType extract(const XprType& x) { return x; }
+  static inline const ExtractType extract(const XprType& x) { return x; }
  static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
 };

@@ -196,7 +192,7 @@ struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
    IsComplex = NumTraits<Scalar>::IsComplex,
    NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex
  };
-  static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+  static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
  static inline Scalar extractScalarFactor(const XprType& x) { return conj(Base::extractScalarFactor(x.nestedExpression())); }
 };

@@ -208,7 +204,7 @@ struct blas_traits<CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> >
  typedef blas_traits<NestedXpr> Base;
  typedef CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> XprType;
  typedef typename Base::ExtractType ExtractType;
-  static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+  static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
  static inline Scalar extractScalarFactor(const XprType& x)
  { return x.functor().m_other * Base::extractScalarFactor(x.nestedExpression()); }
 };
@@ -221,7 +217,7 @@ struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
  typedef blas_traits<NestedXpr> Base;
  typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
  typedef typename Base::ExtractType ExtractType;
-  static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+  static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
  static inline Scalar extractScalarFactor(const XprType& x)
  { return - Base::extractScalarFactor(x.nestedExpression()); }
 };
@@ -243,7 +239,7 @@ struct blas_traits<Transpose<NestedXpr> >
  enum {
    IsTransposed = Base::IsTransposed ? 0 : 1
  };
-  static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+  static inline const ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
  static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
 };

@@ -256,7 +252,7 @@ template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectA
 struct extract_data_selector {
  static const typename T::Scalar* run(const T& m)
  {
-    return blas_traits<T>::extract(m).data();
+    return const_cast<typename T::Scalar*>(&blas_traits<T>::extract(m).coeffRef(0,0)); // FIXME this should be .data()
  }
 };

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Gael Guennebaud	72ffb63165	fix compilation for old but not so old versions of glew	2011-03-18 10:26:21 +01:00
Benoit Jacob	67e24b85a4	bump	2011-03-18 05:13:34 -04:00