bump to 3.1.3

Prevent calling .norm() on integer matrices in the unit tests.
(transplanted from b5d8299ee7 )
2026-04-10 11:34:33 +08:00 · 2013-04-16 09:38:40 +02:00 · 2013-02-28 12:33:34 +01:00 · 2013-02-27 08:07:18 +01:00 · 2013-03-01 00:03:28 +01:00 · 2013-04-11 19:48:34 +02:00
1065 changed files with 53634 additions and 116561 deletions
--- a/.hgeol
+++ b/.hgeol
@@ -1,9 +1,6 @@
 [patterns]
-*.sh = LF
-*.MINPACK = CRLF
 scripts/*.in = LF
 debug/msvc/*.dat = CRLF
-debug/msvc/*.natvis = CRLF
 unsupported/test/mpreal/*.* = CRLF
 ** = native

--- a/.hgignore
+++ b/.hgignore
@@ -30,5 +30,3 @@ log
 patch
 a
 a.*
-lapack/testing
-lapack/reference
--- a/.krazy
+++ b/.krazy
@@ -0,0 +1,3 @@
+SKIP /disabled/
+SKIP /bench/
+SKIP /build/
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
 project(Eigen)

-cmake_minimum_required(VERSION 2.8.4)
+cmake_minimum_required(VERSION 2.6.2)

 # guard against in-source builds

@@ -105,82 +105,26 @@ if(EIGEN_DEFAULT_TO_ROW_MAJOR)
  add_definitions("-DEIGEN_DEFAULT_TO_ROW_MAJOR")
 endif()

+add_definitions("-DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS")
+
 set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")

-macro(ei_add_cxx_compiler_flag FLAG)
-  string(REGEX REPLACE "-" "" SFLAG1 ${FLAG})
-  string(REGEX REPLACE "\\+" "p" SFLAG ${SFLAG1})
-  check_cxx_compiler_flag(${FLAG} COMPILER_SUPPORT_${SFLAG})
-  if(COMPILER_SUPPORT_${SFLAG})
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
-  endif()
-endmacro(ei_add_cxx_compiler_flag)
-
-if(NOT MSVC)
-  # We assume that other compilers are partly compatible with GNUCC
-  
-#  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
+if(CMAKE_COMPILER_IS_GNUCXX)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing")
  set(CMAKE_CXX_FLAGS_DEBUG "-g3")
  set(CMAKE_CXX_FLAGS_RELEASE "-g0 -O2")
-  
-  # clang outputs some warnings for unknwon flags that are not caught by check_cxx_compiler_flag
-  # adding -Werror turns such warnings into errors
-  check_cxx_compiler_flag("-Werror" COMPILER_SUPPORT_WERROR)
-  if(COMPILER_SUPPORT_WERROR)
-    set(CMAKE_REQUIRED_FLAGS "-Werror")
-  endif()
-  
-  ei_add_cxx_compiler_flag("-pedantic")
-  ei_add_cxx_compiler_flag("-Wall")
-  ei_add_cxx_compiler_flag("-Wextra")
-  #ei_add_cxx_compiler_flag("-Weverything")              # clang
-  
-  ei_add_cxx_compiler_flag("-Wundef")
-  ei_add_cxx_compiler_flag("-Wcast-align")
-  ei_add_cxx_compiler_flag("-Wchar-subscripts")
-  ei_add_cxx_compiler_flag("-Wnon-virtual-dtor")
-  ei_add_cxx_compiler_flag("-Wunused-local-typedefs")
-  ei_add_cxx_compiler_flag("-Wpointer-arith")
-  ei_add_cxx_compiler_flag("-Wwrite-strings")
-  ei_add_cxx_compiler_flag("-Wformat-security")
-  ei_add_cxx_compiler_flag("-Wshorten-64-to-32")
-  ei_add_cxx_compiler_flag("-Wenum-conversion")
-  ei_add_cxx_compiler_flag("-Wc++11-extensions")
-  
-  # -Wshadow is insanely too strict with gcc, hopefully it will become usable with gcc 6
-  # if(NOT CMAKE_COMPILER_IS_GNUCXX OR (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "5.0.0"))
-  if(NOT CMAKE_COMPILER_IS_GNUCXX)
-    ei_add_cxx_compiler_flag("-Wshadow")
-  endif()
-  
-  ei_add_cxx_compiler_flag("-Wno-psabi")
-  ei_add_cxx_compiler_flag("-Wno-variadic-macros")
-  ei_add_cxx_compiler_flag("-Wno-long-long")
-  
-  ei_add_cxx_compiler_flag("-fno-check-new")
-  ei_add_cxx_compiler_flag("-fno-common")
-  ei_add_cxx_compiler_flag("-fstrict-aliasing")
-  ei_add_cxx_compiler_flag("-wd981")                    # disable ICC's "operands are evaluated in unspecified order" remark
-  ei_add_cxx_compiler_flag("-wd2304")                   # disbale ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor
-  
-  
-  # The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails
-  # Moreover we should not set both -strict-ansi and -ansi
-  check_cxx_compiler_flag("-strict-ansi" COMPILER_SUPPORT_STRICTANSI)
-  ei_add_cxx_compiler_flag("-Qunused-arguments")        # disable clang warning: argument unused during compilation: '-ansi'
-  
-  if(COMPILER_SUPPORT_STRICTANSI)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -strict-ansi")
-  else()
-    ei_add_cxx_compiler_flag("-ansi")
+
+  check_cxx_compiler_flag("-Wno-variadic-macros" COMPILER_SUPPORT_WNOVARIADICMACRO)
+  if(COMPILER_SUPPORT_WNOVARIADICMACRO)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-variadic-macros")
  endif()

-  if(ANDROID_NDK)
-    ei_add_cxx_compiler_flag("-pie")
-    ei_add_cxx_compiler_flag("-fPIE")
+  check_cxx_compiler_flag("-Wextra" COMPILER_SUPPORT_WEXTRA)
+  if(COMPILER_SUPPORT_WEXTRA)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra")
  endif()
-  
-  set(CMAKE_REQUIRED_FLAGS "")
+
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")

  option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
  if(EIGEN_TEST_SSE2)
@@ -212,49 +156,18 @@ if(NOT MSVC)
    message(STATUS "Enabling SSE4.2 in tests/examples")
  endif()

-  option(EIGEN_TEST_AVX "Enable/Disable AVX in tests/examples" OFF)
-  if(EIGEN_TEST_AVX)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
-    message(STATUS "Enabling AVX in tests/examples")
-  endif()
-
-  option(EIGEN_TEST_FMA "Enable/Disable FMA in tests/examples" OFF)
-  if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma")
-    message(STATUS "Enabling FMA in tests/examples")
-  endif()
-
  option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF)
  if(EIGEN_TEST_ALTIVEC)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
    message(STATUS "Enabling AltiVec in tests/examples")
  endif()

-  option(EIGEN_TEST_VSX "Enable/Disable VSX in tests/examples" OFF)
-  if(EIGEN_TEST_VSX)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -mvsx")
-    message(STATUS "Enabling VSX in tests/examples")
-  endif()
-
  option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
  if(EIGEN_TEST_NEON)
-    if(EIGEN_TEST_FMA)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon-vfpv4")
-    else()
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon")
-    endif()
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8")
    message(STATUS "Enabling NEON in tests/examples")
  endif()

-  option(EIGEN_TEST_NEON64 "Enable/Disable Neon in tests/examples" OFF)
-  if(EIGEN_TEST_NEON64)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-    message(STATUS "Enabling NEON in tests/examples")
-  endif()
-
-
-
  check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP)
  if(COMPILER_SUPPORT_OPENMP)
    option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF)
@@ -264,8 +177,9 @@ if(NOT MSVC)
    endif()
  endif()

-else(NOT MSVC)
+endif(CMAKE_COMPILER_IS_GNUCXX)

+if(MSVC)
  # C4127 - conditional expression is constant
  # C4714 - marked as __forceinline not inlined (I failed to deactivate it selectively)
  #         We can disable this warning in the unit tests since it is clear that it occurs
@@ -295,7 +209,7 @@ else(NOT MSVC)
    endif(NOT CMAKE_CL_64)
    message(STATUS "Enabling SSE2 in tests/examples")
  endif(EIGEN_TEST_SSE2)
-endif(NOT MSVC)
+endif(MSVC)

 option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF)
 option(EIGEN_TEST_X87 "Force using X87 instructions. Implies no vectorization." OFF)
@@ -331,13 +245,7 @@ if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT)
  message(STATUS "Disabling alignment in tests/examples")
 endif()

-option(EIGEN_TEST_NO_EXCEPTIONS "Disables C++ exceptions" OFF)
-if(EIGEN_TEST_NO_EXCEPTIONS)
-  ei_add_cxx_compiler_flag("-fno-exceptions")
-  message(STATUS "Disabling exceptions in tests/examples")
-endif()
-
-option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." OFF)
+option(EIGEN_TEST_C++0x "Enables all C++0x features." OFF)

 include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})

@@ -400,7 +308,6 @@ add_subdirectory(Eigen)
 add_subdirectory(doc EXCLUDE_FROM_ALL)

 include(EigenConfigureTesting)
-
 # fixme, not sure this line is still needed:
 enable_testing() # must be called from the root CMakeLists, see man page

@@ -435,8 +342,6 @@ if(NOT WIN32)
  add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
 endif(NOT WIN32)

-configure_file(scripts/cdashtesting.cmake.in cdashtesting.cmake @ONLY)
-
 ei_testing_print_summary()

 message(STATUS "")
@@ -464,7 +369,6 @@ if(cmake_generator_tolower MATCHES "makefile")
  message(STATUS "make check    | Build and run the unit-tests. Read this page:")
  message(STATUS "              |   http://eigen.tuxfamily.org/index.php?title=Tests")
  message(STATUS "make blas     | Build BLAS library (not the same thing as Eigen)")
-  message(STATUS "make uninstall| Removes files installed by make install")
  message(STATUS "--------------+--------------------------------------------------------------")
 else()
  message(STATUS "To build/run the unit tests, read this page:")
@@ -472,35 +376,3 @@ else()
 endif()

 message(STATUS "")
-
-set ( EIGEN_CONFIG_CMAKE_PATH
-      lib${LIB_SUFFIX}/cmake/eigen3
-      CACHE PATH "The directory where the CMake files are installed"
-    )
-if ( NOT IS_ABSOLUTE EIGEN_CONFIG_CMAKE_PATH )
-  set ( EIGEN_CONFIG_CMAKE_PATH ${CMAKE_INSTALL_PREFIX}/${EIGEN_CONFIG_CMAKE_PATH} )
-endif ()
-
-set ( EIGEN_USE_FILE ${EIGEN_CONFIG_CMAKE_PATH}/UseEigen3.cmake )
-set ( EIGEN_VERSION_STRING ${EIGEN_VERSION_NUMBER} )
-set ( EIGEN_VERSION_MAJOR  ${EIGEN_WORLD_VERSION} )
-set ( EIGEN_VERSION_MINOR  ${EIGEN_MAJOR_VERSION} )
-set ( EIGEN_VERSION_PATCH  ${EIGEN_MINOR_VERSION} )
-set ( EIGEN_DEFINITIONS "")
-set ( EIGEN_INCLUDE_DIR ${INCLUDE_INSTALL_DIR} )
-set ( EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR} )
-set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} )
-
-configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in
-                 ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
-                 @ONLY ESCAPE_QUOTES
-               )
-
-install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake
-                ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake
-          DESTINATION ${EIGEN_CONFIG_CMAKE_PATH}
-        )
-
-# Add uninstall target
-add_custom_target ( uninstall
-    COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/EigenUninstall.cmake)
--- a/CTestConfig.cmake
+++ b/CTestConfig.cmake
@@ -4,14 +4,10 @@
 ## # The following are required to uses Dart and the Cdash dashboard
 ##   ENABLE_TESTING()
 ##   INCLUDE(CTest)
-set(CTEST_PROJECT_NAME "Eigen")
+set(CTEST_PROJECT_NAME "Eigen3.1")
 set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")

 set(CTEST_DROP_METHOD "http")
 set(CTEST_DROP_SITE "manao.inria.fr")
-set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
-set(CTEST_DROP_SITE_CDASH TRUE)
-set(CTEST_PROJECT_SUBPROJECTS
-Official
-Unsupported
-)
+set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.1")
+set(CTEST_DROP_SITE_CDASH TRUE)
--- a/CTestCustom.cmake.in
+++ b/CTestCustom.cmake.in
@@ -1,3 +1,4 @@

-set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "2000")
-set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS   "2000")
+## A tribute to Dynamic!
+set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "33331")
+set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "33331")
--- a/Eigen/Array
+++ b/Eigen/Array
@@ -0,0 +1,11 @@
+#ifndef EIGEN_ARRAY_MODULE_H
+#define EIGEN_ARRAY_MODULE_H
+
+// include Core first to handle Eigen2 support macros
+#include "Core"
+
+#ifndef EIGEN2_SUPPORT
+  #error The Eigen/Array header does no longer exist in Eigen3. All that functionality has moved to Eigen/Core.
+#endif
+
+#endif // EIGEN_ARRAY_MODULE_H
--- a/Eigen/Cholesky
+++ b/Eigen/Cholesky
@@ -10,17 +10,16 @@
  *
  *
  * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices.
-  * Those decompositions are also accessible via the following methods:
-  *  - MatrixBase::llt()
+  * Those decompositions are accessible via the following MatrixBase methods:
+  *  - MatrixBase::llt(),
  *  - MatrixBase::ldlt()
-  *  - SelfAdjointView::llt()
-  *  - SelfAdjointView::ldlt()
  *
  * \code
  * #include <Eigen/Cholesky>
  * \endcode
  */

+#include "src/misc/Solve.h"
 #include "src/Cholesky/LLT.h"
 #include "src/Cholesky/LDLT.h"
 #ifdef EIGEN_USE_LAPACKE
--- a/Eigen/CholmodSupport
+++ b/Eigen/CholmodSupport
@@ -33,8 +33,12 @@ extern "C" {
  *
  */

+#include "src/misc/Solve.h"
+#include "src/misc/SparseSolve.h"
+
 #include "src/CholmodSupport/CholmodSupport.h"

+
 #include "src/Core/util/ReenableStupidWarnings.h"

 #endif // EIGEN_CHOLMODSUPPORT_MODULE_H
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -14,85 +14,37 @@
 // first thing Eigen does: stop the compiler from committing suicide
 #include "src/Core/util/DisableStupidWarnings.h"

-// Handle NVCC/CUDA
-#ifdef __CUDACC__
-  // Do not try asserts on CUDA!
-  #ifndef EIGEN_NO_DEBUG
-  #define EIGEN_NO_DEBUG
-  #endif
-
-  #ifdef EIGEN_INTERNAL_DEBUGGING
-  #undef EIGEN_INTERNAL_DEBUGGING
-  #endif
-
-  // Do not try to vectorize on CUDA!
-  #ifndef EIGEN_DONT_VECTORIZE
-  #define EIGEN_DONT_VECTORIZE
-  #endif
-
-  #ifdef EIGEN_EXCEPTIONS
-  #undef EIGEN_EXCEPTIONS
-  #endif
-  
-  // All functions callable from CUDA code must be qualified with __device__
-  #define EIGEN_DEVICE_FUNC __host__ __device__
-  
-#else
-  #define EIGEN_DEVICE_FUNC
-  
-#endif
-
-#if defined(__CUDA_ARCH__)
-  #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
-#else
-  #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
-#endif
-
-#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS)
-  #define EIGEN_EXCEPTIONS
-#endif
-
-#ifdef EIGEN_EXCEPTIONS
-  #include <new>
-#endif
-
 // then include this file where all our macros are defined. It's really important to do it first because
 // it's where we do all the alignment settings (platform detection and honoring the user's will if he
 // defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
 #include "src/Core/util/Macros.h"

-// Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3)
-// See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details.
-#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6)
-  #pragma GCC optimize ("-fno-ipa-cp-clone")
-#endif
-
 #include <complex>

 // this include file manages BLAS and MKL related macros
 // and inclusion of their respective header files
 #include "src/Core/util/MKL_support.h"

-// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
-// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
-#if EIGEN_MAX_ALIGN_BYTES==0
+// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
+// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
+#if !EIGEN_ALIGN
  #ifndef EIGEN_DONT_VECTORIZE
    #define EIGEN_DONT_VECTORIZE
  #endif
 #endif

-#if EIGEN_COMP_MSVC
+#ifdef _MSC_VER
  #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
-  #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
+  #if (_MSC_VER >= 1500) // 2008 or later
    // Remember that usage of defined() in a #define is undefined by the standard.
    // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
-    #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
+    #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64)
      #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
    #endif
  #endif
 #else
  // Remember that usage of defined() in a #define is undefined by the standard
-  #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
+  #if (defined __SSE2__) && ( (!defined __GNUC__) || (defined __INTEL_COMPILER) || EIGEN_GNUC_AT_LEAST(4,2) )
    #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
  #endif
 #endif
@@ -124,19 +76,6 @@
    #ifdef __SSE4_2__
      #define EIGEN_VECTORIZE_SSE4_2
    #endif
-    #ifdef __AVX__
-      #define EIGEN_VECTORIZE_AVX
-      #define EIGEN_VECTORIZE_SSE3
-      #define EIGEN_VECTORIZE_SSSE3
-      #define EIGEN_VECTORIZE_SSE4_1
-      #define EIGEN_VECTORIZE_SSE4_2
-    #endif
-    #ifdef __AVX2__
-      #define EIGEN_VECTORIZE_AVX2
-    #endif
-    #ifdef __FMA__
-      #define EIGEN_VECTORIZE_FMA
-    #endif

    // include files

@@ -148,39 +87,21 @@
    // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
    // notice that since these are C headers, the extern "C" is theoretically needed anyways.
    extern "C" {
-      // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
-      // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
-      #if EIGEN_COMP_ICC >= 1110
-        #include <immintrin.h>
-      #else
-        #include <emmintrin.h>
-        #include <xmmintrin.h>
-        #ifdef  EIGEN_VECTORIZE_SSE3
-        #include <pmmintrin.h>
-        #endif
-        #ifdef EIGEN_VECTORIZE_SSSE3
-        #include <tmmintrin.h>
-        #endif
-        #ifdef EIGEN_VECTORIZE_SSE4_1
-        #include <smmintrin.h>
-        #endif
-        #ifdef EIGEN_VECTORIZE_SSE4_2
-        #include <nmmintrin.h>
-        #endif
-        #ifdef EIGEN_VECTORIZE_AVX
-        #include <immintrin.h>
-        #endif
+      #include <emmintrin.h>
+      #include <xmmintrin.h>
+      #ifdef  EIGEN_VECTORIZE_SSE3
+      #include <pmmintrin.h>
+      #endif
+      #ifdef EIGEN_VECTORIZE_SSSE3
+      #include <tmmintrin.h>
+      #endif
+      #ifdef EIGEN_VECTORIZE_SSE4_1
+      #include <smmintrin.h>
+      #endif
+      #ifdef EIGEN_VECTORIZE_SSE4_2
+      #include <nmmintrin.h>
      #endif
    } // end extern "C"
-  #elif defined __VSX__
-    #define EIGEN_VECTORIZE
-    #define EIGEN_VECTORIZE_VSX
-    #include <altivec.h>
-    // We need to #undef all these ugly tokens defined in <altivec.h>
-    // => use __vector instead of vector
-    #undef bool
-    #undef vector
-    #undef pixel
  #elif defined __ALTIVEC__
    #define EIGEN_VECTORIZE
    #define EIGEN_VECTORIZE_ALTIVEC
@@ -190,18 +111,13 @@
    #undef bool
    #undef vector
    #undef pixel
-  #elif (defined  __ARM_NEON) || (defined __ARM_NEON__)
+  #elif defined  __ARM_NEON__
    #define EIGEN_VECTORIZE
    #define EIGEN_VECTORIZE_NEON
    #include <arm_neon.h>
  #endif
 #endif

-#if defined __CUDACC__
-  #define EIGEN_VECTORIZE_CUDA
-  #include <vector_types.h>
-#endif
-
 #if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
  #define EIGEN_HAS_OPENMP
 #endif
@@ -211,7 +127,7 @@
 #endif

 // MSVC for windows mobile does not have the errno.h file
-#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM
+#if !(defined(_MSC_VER) && defined(_WIN32_WCE)) && !defined(__ARMCC_VERSION)
 #define EIGEN_HAS_ERRNO
 #endif

@@ -237,17 +153,23 @@
 #endif

 // required for __cpuid, needs to be included after cmath
-#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE
+#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64))
  #include <intrin.h>
 #endif

+#if defined(_CPPUNWIND) || defined(__EXCEPTIONS)
+  #define EIGEN_EXCEPTIONS
+#endif
+
+#ifdef EIGEN_EXCEPTIONS
+  #include <new>
+#endif
+
 /** \brief Namespace containing all symbols from the %Eigen library. */
 namespace Eigen {

 inline static const char *SimdInstructionSetsInUse(void) {
-#if defined(EIGEN_VECTORIZE_AVX)
-  return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
-#elif defined(EIGEN_VECTORIZE_SSE4_2)
+#if defined(EIGEN_VECTORIZE_SSE4_2)
  return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
 #elif defined(EIGEN_VECTORIZE_SSE4_1)
  return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
@@ -259,8 +181,6 @@ inline static const char *SimdInstructionSetsInUse(void) {
  return "SSE, SSE2";
 #elif defined(EIGEN_VECTORIZE_ALTIVEC)
  return "AltiVec";
-#elif defined(EIGEN_VECTORIZE_VSX)
-  return "VSX";
 #elif defined(EIGEN_VECTORIZE_NEON)
  return "ARM NEON";
 #else
@@ -270,9 +190,34 @@ inline static const char *SimdInstructionSetsInUse(void) {

 } // end namespace Eigen

-#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT
-// This will generate an error message:
-#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
+#define STAGE10_FULL_EIGEN2_API             10
+#define STAGE20_RESOLVE_API_CONFLICTS       20
+#define STAGE30_FULL_EIGEN3_API             30
+#define STAGE40_FULL_EIGEN3_STRICTNESS      40
+#define STAGE99_NO_EIGEN2_SUPPORT           99
+
+#if   defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS
+  #define EIGEN2_SUPPORT
+  #define EIGEN2_SUPPORT_STAGE STAGE40_FULL_EIGEN3_STRICTNESS
+#elif defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
+  #define EIGEN2_SUPPORT
+  #define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
+#elif defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS
+  #define EIGEN2_SUPPORT
+  #define EIGEN2_SUPPORT_STAGE STAGE20_RESOLVE_API_CONFLICTS
+#elif defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API
+  #define EIGEN2_SUPPORT
+  #define EIGEN2_SUPPORT_STAGE STAGE10_FULL_EIGEN2_API
+#elif defined EIGEN2_SUPPORT
+  // default to stage 3, that's what it's always meant
+  #define EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API
+  #define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API
+#else
+  #define EIGEN2_SUPPORT_STAGE STAGE99_NO_EIGEN2_SUPPORT
+#endif
+
+#ifdef EIGEN2_SUPPORT
+#undef minor
 #endif

 // we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
@@ -291,75 +236,50 @@ using std::ptrdiff_t;
  * \endcode
  */

+/** \defgroup Support_modules Support modules [category]
+  * Category of modules which add support for external libraries.
+  */
+
 #include "src/Core/util/Constants.h"
-#include "src/Core/util/Meta.h"
 #include "src/Core/util/ForwardDeclarations.h"
-#include "src/Core/util/StaticAssert.h"
+#include "src/Core/util/Meta.h"
 #include "src/Core/util/XprHelper.h"
+#include "src/Core/util/StaticAssert.h"
 #include "src/Core/util/Memory.h"

 #include "src/Core/NumTraits.h"
 #include "src/Core/MathFunctions.h"
 #include "src/Core/GenericPacketMath.h"

-#if defined EIGEN_VECTORIZE_AVX
-  // Use AVX for floats and doubles, SSE for integers
-  #include "src/Core/arch/SSE/PacketMath.h"
-  #include "src/Core/arch/SSE/Complex.h"
-  #include "src/Core/arch/SSE/MathFunctions.h"
-  #include "src/Core/arch/AVX/PacketMath.h"
-  #include "src/Core/arch/AVX/MathFunctions.h"
-  #include "src/Core/arch/AVX/Complex.h"
-  #include "src/Core/arch/AVX/TypeCasting.h"
-#elif defined EIGEN_VECTORIZE_SSE
+#if defined EIGEN_VECTORIZE_SSE
  #include "src/Core/arch/SSE/PacketMath.h"
  #include "src/Core/arch/SSE/MathFunctions.h"
  #include "src/Core/arch/SSE/Complex.h"
-  #include "src/Core/arch/SSE/TypeCasting.h"
-#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
+#elif defined EIGEN_VECTORIZE_ALTIVEC
  #include "src/Core/arch/AltiVec/PacketMath.h"
-  #include "src/Core/arch/AltiVec/MathFunctions.h"
  #include "src/Core/arch/AltiVec/Complex.h"
 #elif defined EIGEN_VECTORIZE_NEON
  #include "src/Core/arch/NEON/PacketMath.h"
-  #include "src/Core/arch/NEON/MathFunctions.h"
  #include "src/Core/arch/NEON/Complex.h"
 #endif

-#if defined EIGEN_VECTORIZE_CUDA
-  #include "src/Core/arch/CUDA/PacketMath.h"
-  #include "src/Core/arch/CUDA/MathFunctions.h"
-#endif
-
 #include "src/Core/arch/Default/Settings.h"

-#include "src/Core/functors/BinaryFunctors.h"
-#include "src/Core/functors/UnaryFunctors.h"
-#include "src/Core/functors/NullaryFunctors.h"
-#include "src/Core/functors/StlFunctors.h"
-#include "src/Core/functors/AssignmentFunctors.h"
-
+#include "src/Core/Functors.h"
 #include "src/Core/DenseCoeffsBase.h"
 #include "src/Core/DenseBase.h"
 #include "src/Core/MatrixBase.h"
 #include "src/Core/EigenBase.h"

-#include "src/Core/Product.h"
-#include "src/Core/CoreEvaluators.h"
-#include "src/Core/AssignEvaluator.h"
-
 #ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874
                                // at least confirmed with Doxygen 1.5.5 and 1.5.6
  #include "src/Core/Assign.h"
 #endif

-#include "src/Core/ArrayBase.h"
 #include "src/Core/util/BlasUtil.h"
 #include "src/Core/DenseStorage.h"
 #include "src/Core/NestByValue.h"
-
-// #include "src/Core/ForceAlignedAccess.h"
-
+#include "src/Core/ForceAlignedAccess.h"
 #include "src/Core/ReturnByValue.h"
 #include "src/Core/NoAlias.h"
 #include "src/Core/PlainObjectBase.h"
@@ -372,10 +292,9 @@ using std::ptrdiff_t;
 #include "src/Core/SelfCwiseBinaryOp.h"
 #include "src/Core/Dot.h"
 #include "src/Core/StableNorm.h"
-#include "src/Core/Stride.h"
 #include "src/Core/MapBase.h"
+#include "src/Core/Stride.h"
 #include "src/Core/Map.h"
-#include "src/Core/Ref.h"
 #include "src/Core/Block.h"
 #include "src/Core/VectorBlock.h"
 #include "src/Core/Transpose.h"
@@ -390,14 +309,14 @@ using std::ptrdiff_t;
 #include "src/Core/IO.h"
 #include "src/Core/Swap.h"
 #include "src/Core/CommaInitializer.h"
+#include "src/Core/Flagged.h"
+#include "src/Core/ProductBase.h"
 #include "src/Core/GeneralProduct.h"
-#include "src/Core/Solve.h"
-#include "src/Core/Inverse.h"
 #include "src/Core/TriangularMatrix.h"
 #include "src/Core/SelfAdjointView.h"
 #include "src/Core/products/GeneralBlockPanelKernel.h"
 #include "src/Core/products/Parallelizer.h"
-#include "src/Core/ProductEvaluators.h"
+#include "src/Core/products/CoeffBasedProduct.h"
 #include "src/Core/products/GeneralMatrixVector.h"
 #include "src/Core/products/GeneralMatrixMatrix.h"
 #include "src/Core/SolveTriangular.h"
@@ -411,7 +330,6 @@ using std::ptrdiff_t;
 #include "src/Core/products/TriangularSolverMatrix.h"
 #include "src/Core/products/TriangularSolverVector.h"
 #include "src/Core/BandMatrix.h"
-#include "src/Core/CoreIterators.h"

 #include "src/Core/BooleanRedux.h"
 #include "src/Core/Select.h"
@@ -419,6 +337,7 @@ using std::ptrdiff_t;
 #include "src/Core/Random.h"
 #include "src/Core/Replicate.h"
 #include "src/Core/Reverse.h"
+#include "src/Core/ArrayBase.h"
 #include "src/Core/ArrayWrapper.h"

 #ifdef EIGEN_USE_BLAS
@@ -440,4 +359,8 @@ using std::ptrdiff_t;

 #include "src/Core/util/ReenableStupidWarnings.h"

+#ifdef EIGEN2_SUPPORT
+#include "Eigen2Support"
+#endif
+
 #endif // EIGEN_CORE_H
--- a/Eigen/Eigen
+++ b/Eigen/Eigen
@@ -1,2 +1,2 @@
 #include "Dense"
-#include "Sparse"
+//#include "Sparse"
--- a/Eigen/Eigen2Support
+++ b/Eigen/Eigen2Support
@@ -0,0 +1,82 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN2SUPPORT_H
+#define EIGEN2SUPPORT_H
+
+#if (!defined(EIGEN2_SUPPORT)) || (!defined(EIGEN_CORE_H))
+#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header
+#endif
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+/** \ingroup Support_modules
+  * \defgroup Eigen2Support_Module Eigen2 support module
+  * This module provides a couple of deprecated functions improving the compatibility with Eigen2.
+  *
+  * To use it, define EIGEN2_SUPPORT before including any Eigen header
+  * \code
+  * #define EIGEN2_SUPPORT
+  * \endcode
+  *
+  */
+
+#include "src/Eigen2Support/Macros.h"
+#include "src/Eigen2Support/Memory.h"
+#include "src/Eigen2Support/Meta.h"
+#include "src/Eigen2Support/Lazy.h"
+#include "src/Eigen2Support/Cwise.h"
+#include "src/Eigen2Support/CwiseOperators.h"
+#include "src/Eigen2Support/TriangularSolver.h"
+#include "src/Eigen2Support/Block.h"
+#include "src/Eigen2Support/VectorBlock.h"
+#include "src/Eigen2Support/Minor.h"
+#include "src/Eigen2Support/MathFunctions.h"
+
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+// Eigen2 used to include iostream
+#include<iostream>
+
+#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
+using Eigen::Matrix##SizeSuffix##TypeSuffix; \
+using Eigen::Vector##SizeSuffix##TypeSuffix; \
+using Eigen::RowVector##SizeSuffix##TypeSuffix;
+
+#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
+
+#define EIGEN_USING_MATRIX_TYPEDEFS \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \
+EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd)
+
+#define USING_PART_OF_NAMESPACE_EIGEN \
+EIGEN_USING_MATRIX_TYPEDEFS \
+using Eigen::Matrix; \
+using Eigen::MatrixBase; \
+using Eigen::ei_random; \
+using Eigen::ei_real; \
+using Eigen::ei_imag; \
+using Eigen::ei_conj; \
+using Eigen::ei_abs; \
+using Eigen::ei_abs2; \
+using Eigen::ei_sqrt; \
+using Eigen::ei_exp; \
+using Eigen::ei_log; \
+using Eigen::ei_sin; \
+using Eigen::ei_cos;
+
+#endif // EIGEN2SUPPORT_H
--- a/Eigen/Eigenvalues
+++ b/Eigen/Eigenvalues
@@ -33,8 +33,6 @@
 #include "src/Eigenvalues/HessenbergDecomposition.h"
 #include "src/Eigenvalues/ComplexSchur.h"
 #include "src/Eigenvalues/ComplexEigenSolver.h"
-#include "src/Eigenvalues/RealQZ.h"
-#include "src/Eigenvalues/GeneralizedEigenSolver.h"
 #include "src/Eigenvalues/MatrixBaseEigenvalues.h"
 #ifdef EIGEN_USE_LAPACKE
 #include "src/Eigenvalues/RealSchur_MKL.h"
--- a/Eigen/Geometry
+++ b/Eigen/Geometry
@@ -9,6 +9,10 @@
 #include "LU"
 #include <limits>

+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
 /** \defgroup Geometry_Module Geometry module
  *
  *
@@ -29,23 +33,27 @@
 #include "src/Geometry/OrthoMethods.h"
 #include "src/Geometry/EulerAngles.h"

-#include "src/Geometry/Homogeneous.h"
-#include "src/Geometry/RotationBase.h"
-#include "src/Geometry/Rotation2D.h"
-#include "src/Geometry/Quaternion.h"
-#include "src/Geometry/AngleAxis.h"
-#include "src/Geometry/Transform.h"
-#include "src/Geometry/Translation.h"
-#include "src/Geometry/Scaling.h"
-#include "src/Geometry/Hyperplane.h"
-#include "src/Geometry/ParametrizedLine.h"
-#include "src/Geometry/AlignedBox.h"
-#include "src/Geometry/Umeyama.h"
+#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
+  #include "src/Geometry/Homogeneous.h"
+  #include "src/Geometry/RotationBase.h"
+  #include "src/Geometry/Rotation2D.h"
+  #include "src/Geometry/Quaternion.h"
+  #include "src/Geometry/AngleAxis.h"
+  #include "src/Geometry/Transform.h"
+  #include "src/Geometry/Translation.h"
+  #include "src/Geometry/Scaling.h"
+  #include "src/Geometry/Hyperplane.h"
+  #include "src/Geometry/ParametrizedLine.h"
+  #include "src/Geometry/AlignedBox.h"
+  #include "src/Geometry/Umeyama.h"

-// Use the SSE optimized version whenever possible. At the moment the
-// SSE version doesn't compile when AVX is enabled
-#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX
-#include "src/Geometry/arch/Geometry_SSE.h"
+  #if defined EIGEN_VECTORIZE_SSE
+    #include "src/Geometry/arch/Geometry_SSE.h"
+  #endif
+#endif
+
+#ifdef EIGEN2_SUPPORT
+#include "src/Eigen2Support/Geometry/All.h"
 #endif

 #include "src/Core/util/ReenableStupidWarnings.h"
--- a/Eigen/IterativeLinearSolvers
+++ b/Eigen/IterativeLinearSolvers
@@ -6,32 +6,32 @@

 #include "src/Core/util/DisableStupidWarnings.h"

-/** 
+/** \ingroup Sparse_modules
  * \defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module
  *
  * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse.
  * Those solvers are accessible via the following classes:
  *  - ConjugateGradient for selfadjoint (hermitian) matrices,
-  *  - LeastSquaresConjugateGradient for rectangular least-square problems,
  *  - BiCGSTAB for general square matrices.
  *
  * These iterative solvers are associated with some preconditioners:
  *  - IdentityPreconditioner - not really useful
-  *  - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices.
-  *  - IncompleteLUT - incomplete LU factorization with dual thresholding
+  *  - DiagonalPreconditioner - also called JAcobi preconditioner, work very well on diagonal dominant matrices.
+  *  - IncompleteILUT - incomplete LU factorization with dual thresholding
  *
  * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.
  *
-    \code
-    #include <Eigen/IterativeLinearSolvers>
-    \endcode
+  * \code
+  * #include <Eigen/IterativeLinearSolvers>
+  * \endcode
  */

-#include "src/IterativeLinearSolvers/SolveWithGuess.h"
+#include "src/misc/Solve.h"
+#include "src/misc/SparseSolve.h"
+
 #include "src/IterativeLinearSolvers/IterativeSolverBase.h"
 #include "src/IterativeLinearSolvers/BasicPreconditioners.h"
 #include "src/IterativeLinearSolvers/ConjugateGradient.h"
-#include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h"
 #include "src/IterativeLinearSolvers/BiCGSTAB.h"
 #include "src/IterativeLinearSolvers/IncompleteLUT.h"

--- a/Eigen/LU
+++ b/Eigen/LU
@@ -16,6 +16,7 @@
  * \endcode
  */

+#include "src/misc/Solve.h"
 #include "src/misc/Kernel.h"
 #include "src/misc/Image.h"
 #include "src/LU/FullPivLU.h"
@@ -24,14 +25,16 @@
 #include "src/LU/PartialPivLU_MKL.h"
 #endif
 #include "src/LU/Determinant.h"
-#include "src/LU/InverseImpl.h"
+#include "src/LU/Inverse.h"

-// Use the SSE optimized version whenever possible. At the moment the
-// SSE version doesn't compile when AVX is enabled
-#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX
+#if defined EIGEN_VECTORIZE_SSE
  #include "src/LU/arch/Inverse_SSE.h"
 #endif

+#ifdef EIGEN2_SUPPORT
+  #include "src/Eigen2Support/LU.h"
+#endif
+
 #include "src/Core/util/ReenableStupidWarnings.h"

 #endif // EIGEN_LU_MODULE_H
--- a/Eigen/LeastSquares
+++ b/Eigen/LeastSquares
@@ -0,0 +1,32 @@
+#ifndef EIGEN_REGRESSION_MODULE_H
+#define EIGEN_REGRESSION_MODULE_H
+
+#ifndef EIGEN2_SUPPORT
+#error LeastSquares is only available in Eigen2 support mode (define EIGEN2_SUPPORT)
+#endif
+
+// exclude from normal eigen3-only documentation
+#ifdef EIGEN2_SUPPORT
+
+#include "Core"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+#include "Eigenvalues"
+#include "Geometry"
+
+/** \defgroup LeastSquares_Module LeastSquares module
+  * This module provides linear regression and related features.
+  *
+  * \code
+  * #include <Eigen/LeastSquares>
+  * \endcode
+  */
+
+#include "src/Eigen2Support/LeastSquares.h"
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN2_SUPPORT
+
+#endif // EIGEN_REGRESSION_MODULE_H
--- a/Eigen/MetisSupport
+++ b/Eigen/MetisSupport
@@ -1,28 +0,0 @@
-#ifndef EIGEN_METISSUPPORT_MODULE_H
-#define EIGEN_METISSUPPORT_MODULE_H
-
-#include "SparseCore"
-
-#include "src/Core/util/DisableStupidWarnings.h"
-
-extern "C" {
-#include <metis.h>
-}
-
-
-/** \ingroup Support_modules
-  * \defgroup MetisSupport_Module MetisSupport module
-  *
-  * \code
-  * #include <Eigen/MetisSupport>
-  * \endcode
-  * This module defines an interface to the METIS reordering package (http://glaros.dtc.umn.edu/gkhome/views/metis). 
-  * It can be used just as any other built-in method as explained in \link OrderingMethods_Module here. \endlink
-  */
-
-
-#include "src/MetisSupport/MetisSupport.h"
-
-#include "src/Core/util/ReenableStupidWarnings.h"
-
-#endif // EIGEN_METISSUPPORT_MODULE_H
--- a/Eigen/OrderingMethods
+++ b/Eigen/OrderingMethods
@@ -5,62 +5,19 @@

 #include "src/Core/util/DisableStupidWarnings.h"

-/** 
+/** \ingroup Sparse_modules
  * \defgroup OrderingMethods_Module OrderingMethods module
  *
-  * This module is currently for internal use only
-  * 
-  * It defines various built-in and external ordering methods for sparse matrices. 
-  * They are typically used to reduce the number of elements during 
-  * the sparse matrix decomposition (LLT, LU, QR).
-  * Precisely, in a preprocessing step, a permutation matrix P is computed using 
-  * those ordering methods and applied to the columns of the matrix. 
-  * Using for instance the sparse Cholesky decomposition, it is expected that 
-  * the nonzeros elements in LLT(A*P) will be much smaller than that in LLT(A).
-  * 
-  * 
-  * Usage : 
+  * This module is currently for internal use only.
+  *
+  *
  * \code
  * #include <Eigen/OrderingMethods>
  * \endcode
-  * 
-  * A simple usage is as a template parameter in the sparse decomposition classes : 
-  * 
-  * \code 
-  * SparseLU<MatrixType, COLAMDOrdering<int> > solver;
-  * \endcode 
-  * 
-  * \code 
-  * SparseQR<MatrixType, COLAMDOrdering<int> > solver;
-  * \endcode
-  * 
-  * It is possible as well to call directly a particular ordering method for your own purpose, 
-  * \code 
-  * AMDOrdering<int> ordering;
-  * PermutationMatrix<Dynamic, Dynamic, int> perm;
-  * SparseMatrix<double> A; 
-  * //Fill the matrix ...
-  * 
-  * ordering(A, perm); // Call AMD
-  * \endcode
-  * 
-  * \note Some of these methods (like AMD or METIS), need the sparsity pattern 
-  * of the input matrix to be symmetric. When the matrix is structurally unsymmetric, 
-  * Eigen computes internally the pattern of \f$A^T*A\f$ before calling the method.
-  * If your matrix is already symmetric (at leat in structure), you can avoid that
-  * by calling the method with a SelfAdjointView type.
-  * 
-  * \code
-  *  // Call the ordering on the pattern of the lower triangular matrix A
-  * ordering(A.selfadjointView<Lower>(), perm);
-  * \endcode
  */

-#ifndef EIGEN_MPL2_ONLY
 #include "src/OrderingMethods/Amd.h"
-#endif

-#include "src/OrderingMethods/Ordering.h"
 #include "src/Core/util/ReenableStupidWarnings.h"

 #endif // EIGEN_ORDERINGMETHODS_MODULE_H
--- a/Eigen/PaStiXSupport
+++ b/Eigen/PaStiXSupport
@@ -35,8 +35,12 @@ extern "C" {
  *
  */

+#include "src/misc/Solve.h"
+#include "src/misc/SparseSolve.h"
+
 #include "src/PaStiXSupport/PaStiXSupport.h"

+
 #include "src/Core/util/ReenableStupidWarnings.h"

 #endif // EIGEN_PASTIXSUPPORT_MODULE_H
--- a/Eigen/QR
+++ b/Eigen/QR
@@ -15,15 +15,14 @@
  *
  * This module provides various QR decompositions
  * This module also provides some MatrixBase methods, including:
-  *  - MatrixBase::householderQr()
-  *  - MatrixBase::colPivHouseholderQr()
-  *  - MatrixBase::fullPivHouseholderQr()
+  *  - MatrixBase::qr(),
  *
  * \code
  * #include <Eigen/QR>
  * \endcode
  */

+#include "src/misc/Solve.h"
 #include "src/QR/HouseholderQR.h"
 #include "src/QR/FullPivHouseholderQR.h"
 #include "src/QR/ColPivHouseholderQR.h"
@@ -32,7 +31,15 @@
 #include "src/QR/ColPivHouseholderQR_MKL.h"
 #endif

+#ifdef EIGEN2_SUPPORT
+#include "src/Eigen2Support/QR.h"
+#endif
+
 #include "src/Core/util/ReenableStupidWarnings.h"

+#ifdef EIGEN2_SUPPORT
+#include "Eigenvalues"
+#endif
+
 #endif // EIGEN_QR_MODULE_H
 /* vim: set filetype=cpp et sw=2 ts=2 ai: */
--- a/Eigen/SPQRSupport
+++ b/Eigen/SPQRSupport
@@ -1,27 +0,0 @@
-#ifndef EIGEN_SPQRSUPPORT_MODULE_H
-#define EIGEN_SPQRSUPPORT_MODULE_H
-
-#include "SparseCore"
-
-#include "src/Core/util/DisableStupidWarnings.h"
-
-#include "SuiteSparseQR.hpp"
-
-/** \ingroup Support_modules
-  * \defgroup SPQRSupport_Module SuiteSparseQR module
-  * 
-  * This module provides an interface to the SPQR library, which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package.
-  *
-  * \code
-  * #include <Eigen/SPQRSupport>
-  * \endcode
-  *
-  * In order to use this module, the SPQR headers must be accessible from the include paths, and your binary must be linked to the SPQR library and its dependencies (Cholmod, AMD, COLAMD,...).
-  * For a cmake based project, you can use our FindSPQR.cmake and FindCholmod.Cmake modules
-  *
-  */
-
-#include "src/CholmodSupport/CholmodSupport.h"
-#include "src/SPQRSupport/SuiteSparseQRSupport.h"
-
-#endif
--- a/Eigen/SVD
+++ b/Eigen/SVD
@@ -12,25 +12,24 @@
  *
  *
  * This module provides SVD decomposition for matrices (both real and complex).
-  * Two decomposition algorithms are provided:
-  *  - JacobiSVD implementing two-sided Jacobi iterations is numerically very accurate, fast for small matrices, but very slow for larger ones.
-  *  - BDCSVD implementing a recursive divide & conquer strategy on top of an upper-bidiagonalization which remains fast for large problems.
-  * These decompositions are accessible via the respective classes and following MatrixBase methods:
+  * This decomposition is accessible via the following MatrixBase method:
  *  - MatrixBase::jacobiSvd()
-  *  - MatrixBase::bdcSvd()
  *
  * \code
  * #include <Eigen/SVD>
  * \endcode
  */

-#include "src/SVD/UpperBidiagonalization.h"
-#include "src/SVD/SVDBase.h"
+#include "src/misc/Solve.h"
 #include "src/SVD/JacobiSVD.h"
-#include "src/SVD/BDCSVD.h"
 #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
 #include "src/SVD/JacobiSVD_MKL.h"
 #endif
+#include "src/SVD/UpperBidiagonalization.h"
+
+#ifdef EIGEN2_SUPPORT
+#include "src/Eigen2Support/SVD.h"
+#endif

 #include "src/Core/util/ReenableStupidWarnings.h"

--- a/Eigen/Sparse
+++ b/Eigen/Sparse
@@ -1,26 +1,22 @@
 #ifndef EIGEN_SPARSE_MODULE_H
 #define EIGEN_SPARSE_MODULE_H

-/** \defgroup Sparse_Module Sparse meta-module
+/** \defgroup Sparse_modules Sparse modules
  *
  * Meta-module including all related modules:
-  * - \ref SparseCore_Module
-  * - \ref OrderingMethods_Module
-  * - \ref SparseCholesky_Module
-  * - \ref SparseLU_Module
-  * - \ref SparseQR_Module
-  * - \ref IterativeLinearSolvers_Module
+  * - SparseCore
+  * - OrderingMethods
+  * - SparseCholesky
+  * - IterativeLinearSolvers
  *
-    \code
-    #include <Eigen/Sparse>
-    \endcode
+  * \code
+  * #include <Eigen/Sparse>
+  * \endcode
  */

 #include "SparseCore"
 #include "OrderingMethods"
 #include "SparseCholesky"
-#include "SparseLU"
-#include "SparseQR"
 #include "IterativeLinearSolvers"

 #endif // EIGEN_SPARSE_MODULE_H
--- a/Eigen/SparseCholesky
+++ b/Eigen/SparseCholesky
@@ -1,21 +1,11 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008-2013 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
 #ifndef EIGEN_SPARSECHOLESKY_MODULE_H
 #define EIGEN_SPARSECHOLESKY_MODULE_H

 #include "SparseCore"
-#include "OrderingMethods"

 #include "src/Core/util/DisableStupidWarnings.h"

-/** 
+/** \ingroup Sparse_modules
  * \defgroup SparseCholesky_Module SparseCholesky module
  *
  * This module currently provides two variants of the direct sparse Cholesky decomposition for selfadjoint (hermitian) matrices.
@@ -30,16 +20,11 @@
  * \endcode
  */

-#ifdef EIGEN_MPL2_ONLY
-#error The SparseCholesky module has nothing to offer in MPL2 only mode
-#endif
+#include "src/misc/Solve.h"
+#include "src/misc/SparseSolve.h"

 #include "src/SparseCholesky/SimplicialCholesky.h"

-#ifndef EIGEN_MPL2_ONLY
-#include "src/SparseCholesky/SimplicialCholesky_impl.h"
-#endif
-
 #include "src/Core/util/ReenableStupidWarnings.h"

 #endif // EIGEN_SPARSECHOLESKY_MODULE_H
--- a/Eigen/SparseCore
+++ b/Eigen/SparseCore
@@ -11,7 +11,7 @@
 #include <cstring>
 #include <algorithm>

-/** 
+/** \ingroup Sparse_modules
  * \defgroup SparseCore_Module SparseCore module
  *
  * This module provides a sparse matrix representation, and basic associatd matrix manipulations
@@ -26,35 +26,39 @@
  * This module depends on: Core.
  */

+namespace Eigen {
+
+/** The type used to identify a general sparse storage. */
+struct Sparse {};
+
+}
+
 #include "src/SparseCore/SparseUtil.h"
 #include "src/SparseCore/SparseMatrixBase.h"
-#include "src/SparseCore/SparseAssign.h"
 #include "src/SparseCore/CompressedStorage.h"
 #include "src/SparseCore/AmbiVector.h"
-#include "src/SparseCore/SparseCompressedBase.h"
 #include "src/SparseCore/SparseMatrix.h"
-#include "src/SparseCore/SparseMap.h"
 #include "src/SparseCore/MappedSparseMatrix.h"
 #include "src/SparseCore/SparseVector.h"
-#include "src/SparseCore/SparseRef.h"
+#include "src/SparseCore/CoreIterators.h"
+#include "src/SparseCore/SparseBlock.h"
+#include "src/SparseCore/SparseTranspose.h"
 #include "src/SparseCore/SparseCwiseUnaryOp.h"
 #include "src/SparseCore/SparseCwiseBinaryOp.h"
-#include "src/SparseCore/SparseTranspose.h"
-#include "src/SparseCore/SparseBlock.h"
 #include "src/SparseCore/SparseDot.h"
+#include "src/SparseCore/SparsePermutation.h"
+#include "src/SparseCore/SparseAssign.h"
 #include "src/SparseCore/SparseRedux.h"
-#include "src/SparseCore/SparseView.h"
-#include "src/SparseCore/SparseDiagonalProduct.h"
+#include "src/SparseCore/SparseFuzzy.h"
 #include "src/SparseCore/ConservativeSparseSparseProduct.h"
 #include "src/SparseCore/SparseSparseProductWithPruning.h"
 #include "src/SparseCore/SparseProduct.h"
 #include "src/SparseCore/SparseDenseProduct.h"
-#include "src/SparseCore/SparseSelfAdjointView.h"
+#include "src/SparseCore/SparseDiagonalProduct.h"
 #include "src/SparseCore/SparseTriangularView.h"
+#include "src/SparseCore/SparseSelfAdjointView.h"
 #include "src/SparseCore/TriangularSolver.h"
-#include "src/SparseCore/SparsePermutation.h"
-#include "src/SparseCore/SparseFuzzy.h"
-#include "src/SparseCore/SparseSolverBase.h"
+#include "src/SparseCore/SparseView.h"

 #include "src/Core/util/ReenableStupidWarnings.h"

--- a/Eigen/SparseLU
+++ b/Eigen/SparseLU
@@ -1,46 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
-// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_SPARSELU_MODULE_H
-#define EIGEN_SPARSELU_MODULE_H
-
-#include "SparseCore"
-
-/** 
-  * \defgroup SparseLU_Module SparseLU module
-  * This module defines a supernodal factorization of general sparse matrices.
-  * The code is fully optimized for supernode-panel updates with specialized kernels.
-  * Please, see the documentation of the SparseLU class for more details.
-  */
-
-// Ordering interface
-#include "OrderingMethods"
-
-#include "src/SparseLU/SparseLU_gemm_kernel.h"
-
-#include "src/SparseLU/SparseLU_Structs.h"
-#include "src/SparseLU/SparseLU_SupernodalMatrix.h"
-#include "src/SparseLU/SparseLUImpl.h"
-#include "src/SparseCore/SparseColEtree.h"
-#include "src/SparseLU/SparseLU_Memory.h"
-#include "src/SparseLU/SparseLU_heap_relax_snode.h"
-#include "src/SparseLU/SparseLU_relax_snode.h"
-#include "src/SparseLU/SparseLU_pivotL.h"
-#include "src/SparseLU/SparseLU_panel_dfs.h"
-#include "src/SparseLU/SparseLU_kernel_bmod.h"
-#include "src/SparseLU/SparseLU_panel_bmod.h"
-#include "src/SparseLU/SparseLU_column_dfs.h"
-#include "src/SparseLU/SparseLU_column_bmod.h"
-#include "src/SparseLU/SparseLU_copy_to_ucol.h"
-#include "src/SparseLU/SparseLU_pruneL.h"
-#include "src/SparseLU/SparseLU_Utils.h"
-#include "src/SparseLU/SparseLU.h"
-
-#endif // EIGEN_SPARSELU_MODULE_H
--- a/Eigen/SparseQR
+++ b/Eigen/SparseQR
@@ -1,30 +0,0 @@
-#ifndef EIGEN_SPARSEQR_MODULE_H
-#define EIGEN_SPARSEQR_MODULE_H
-
-#include "SparseCore"
-#include "OrderingMethods"
-#include "src/Core/util/DisableStupidWarnings.h"
-
-/** \defgroup SparseQR_Module SparseQR module
-  * \brief Provides QR decomposition for sparse matrices
-  * 
-  * This module provides a simplicial version of the left-looking Sparse QR decomposition. 
-  * The columns of the input matrix should be reordered to limit the fill-in during the 
-  * decomposition. Built-in methods (COLAMD, AMD) or external  methods (METIS) can be used to this end.
-  * See the \link OrderingMethods_Module OrderingMethods\endlink module for the list 
-  * of built-in and external ordering methods.
-  * 
-  * \code
-  * #include <Eigen/SparseQR>
-  * \endcode
-  * 
-  * 
-  */
-
-#include "OrderingMethods"
-#include "src/SparseCore/SparseColEtree.h"
-#include "src/SparseQR/SparseQR.h"
-
-#include "src/Core/util/ReenableStupidWarnings.h"
-
-#endif
--- a/Eigen/StdDeque
+++ b/Eigen/StdDeque
@@ -14,7 +14,7 @@
 #include "Core"
 #include <deque>

-#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */
+#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */

 #define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...)

--- a/Eigen/StdList
+++ b/Eigen/StdList
@@ -13,7 +13,7 @@
 #include "Core"
 #include <list>

-#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */    
+#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */    

 #define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...)

--- a/Eigen/StdVector
+++ b/Eigen/StdVector
@@ -14,7 +14,7 @@
 #include "Core"
 #include <vector>

-#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */
+#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */

 #define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...)

--- a/Eigen/SuperLUSupport
+++ b/Eigen/SuperLUSupport
@@ -48,8 +48,12 @@ namespace Eigen { struct SluMatrix; }
  *
  */

+#include "src/misc/Solve.h"
+#include "src/misc/SparseSolve.h"
+
 #include "src/SuperLUSupport/SuperLUSupport.h"

+
 #include "src/Core/util/ReenableStupidWarnings.h"

 #endif // EIGEN_SUPERLUSUPPORT_MODULE_H
--- a/Eigen/UmfPackSupport
+++ b/Eigen/UmfPackSupport
@@ -26,6 +26,9 @@ extern "C" {
  *
  */

+#include "src/misc/Solve.h"
+#include "src/misc/SparseSolve.h"
+
 #include "src/UmfPackSupport/UmfPackSupport.h"

 #include "src/Core/util/ReenableStupidWarnings.h"
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -16,10 +16,7 @@
 namespace Eigen { 

 namespace internal {
-  template<typename MatrixType, int UpLo> struct LDLT_Traits;
-
-  // PositiveSemiDef means positive semi-definite and non-zero; same for NegativeSemiDef
-  enum SignMatrix { PositiveSemiDef, NegativeSemiDef, ZeroSign, Indefinite };
+template<typename MatrixType, int UpLo> struct LDLT_Traits;
 }

 /** \ingroup Cholesky_Module
@@ -43,7 +40,7 @@ namespace internal {
  * Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky
  * decomposition to determine whether a system of equations has a solution.
  *
-  * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT
+  * \sa MatrixBase::ldlt(), class LLT
  */
 template<typename _MatrixType, int _UpLo> class LDLT
 {
@@ -59,8 +56,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
    };
    typedef typename MatrixType::Scalar Scalar;
    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
-    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
-    typedef typename MatrixType::StorageIndex StorageIndex;
+    typedef typename MatrixType::Index Index;
    typedef Matrix<Scalar, RowsAtCompileTime, 1, Options, MaxRowsAtCompileTime, 1> TmpMatrixType;

    typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
@@ -73,12 +69,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
      * The default constructor is useful in cases in which the user intends to
      * perform decompositions via LDLT::compute(const MatrixType&).
      */
-    LDLT() 
-      : m_matrix(), 
-        m_transpositions(), 
-        m_sign(internal::ZeroSign),
-        m_isInitialized(false) 
-    {}
+    LDLT() : m_matrix(), m_transpositions(), m_isInitialized(false) {}

    /** \brief Default Constructor with memory preallocation
      *
@@ -86,11 +77,10 @@ template<typename _MatrixType, int _UpLo> class LDLT
      * according to the specified problem \a size.
      * \sa LDLT()
      */
-    explicit LDLT(Index size)
+    LDLT(Index size)
      : m_matrix(size, size),
        m_transpositions(size),
        m_temporary(size),
-        m_sign(internal::ZeroSign),
        m_isInitialized(false)
    {}

@@ -99,11 +89,10 @@ template<typename _MatrixType, int _UpLo> class LDLT
      * This calculates the decomposition for the input \a matrix.
      * \sa LDLT(Index size)
      */
-    explicit LDLT(const MatrixType& matrix)
+    LDLT(const MatrixType& matrix)
      : m_matrix(matrix.rows(), matrix.cols()),
        m_transpositions(matrix.rows()),
        m_temporary(matrix.rows()),
-        m_sign(internal::ZeroSign),
        m_isInitialized(false)
    {
      compute(matrix);
@@ -150,14 +139,21 @@ template<typename _MatrixType, int _UpLo> class LDLT
    inline bool isPositive() const
    {
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
-      return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign;
+      return m_sign == 1;
    }
+    
+    #ifdef EIGEN2_SUPPORT
+    inline bool isPositiveDefinite() const
+    {
+      return isPositive();
+    }
+    #endif

    /** \returns true if the matrix is negative (semidefinite) */
    inline bool isNegative(void) const
    {
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
-      return m_sign == internal::NegativeSemiDef || m_sign == internal::ZeroSign;
+      return m_sign == -1;
    }

    /** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A.
@@ -173,25 +169,34 @@ template<typename _MatrixType, int _UpLo> class LDLT
      * least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
      * computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular.
      *
-      * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt()
+      * \sa MatrixBase::ldlt()
      */
    template<typename Rhs>
-    inline const Solve<LDLT, Rhs>
+    inline const internal::solve_retval<LDLT, Rhs>
    solve(const MatrixBase<Rhs>& b) const
    {
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
      eigen_assert(m_matrix.rows()==b.rows()
                && "LDLT::solve(): invalid number of rows of the right hand side matrix b");
-      return Solve<LDLT, Rhs>(*this, b.derived());
+      return internal::solve_retval<LDLT, Rhs>(*this, b.derived());
    }

+    #ifdef EIGEN2_SUPPORT
+    template<typename OtherDerived, typename ResultType>
+    bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const
+    {
+      *result = this->solve(b);
+      return true;
+    }
+    #endif
+
    template<typename Derived>
    bool solveInPlace(MatrixBase<Derived> &bAndX) const;

    LDLT& compute(const MatrixType& matrix);

    template <typename Derived>
-    LDLT& rankUpdate(const MatrixBase<Derived>& w, const RealScalar& alpha=1);
+    LDLT& rankUpdate(const MatrixBase<Derived>& w,RealScalar alpha=1);

    /** \returns the internal LDLT decomposition matrix
      *
@@ -218,19 +223,8 @@ template<typename _MatrixType, int _UpLo> class LDLT
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
      return Success;
    }
-    
-    #ifndef EIGEN_PARSED_BY_DOXYGEN
-    template<typename RhsType, typename DstType>
-    EIGEN_DEVICE_FUNC
-    void _solve_impl(const RhsType &rhs, DstType &dst) const;
-    #endif

  protected:
-    
-    static void check_template_parameters()
-    {
-      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
-    }

    /** \internal
      * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
@@ -241,7 +235,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
    MatrixType m_matrix;
    TranspositionType m_transpositions;
    TmpMatrixType m_temporary;
-    internal::SignMatrix m_sign;
+    int m_sign;
    bool m_isInitialized;
 };

@@ -252,32 +246,50 @@ template<int UpLo> struct ldlt_inplace;
 template<> struct ldlt_inplace<Lower>
 {
  template<typename MatrixType, typename TranspositionType, typename Workspace>
-  static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)
+  static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0)
  {
-    using std::abs;
    typedef typename MatrixType::Scalar Scalar;
    typedef typename MatrixType::RealScalar RealScalar;
-    typedef typename TranspositionType::StorageIndex IndexType;
+    typedef typename MatrixType::Index Index;
    eigen_assert(mat.rows()==mat.cols());
    const Index size = mat.rows();

    if (size <= 1)
    {
      transpositions.setIdentity();
-      if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef;
-      else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef;
-      else sign = ZeroSign;
+      if(sign)
+        *sign = real(mat.coeff(0,0))>0 ? 1:-1;
      return true;
    }

+    RealScalar cutoff(0), biggest_in_corner;
+
    for (Index k = 0; k < size; ++k)
    {
      // Find largest diagonal element
      Index index_of_biggest_in_corner;
-      mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner);
+      biggest_in_corner = mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner);
      index_of_biggest_in_corner += k;

-      transpositions.coeffRef(k) = IndexType(index_of_biggest_in_corner);
+      if(k == 0)
+      {
+        // The biggest overall is the point of reference to which further diagonals
+        // are compared; if any diagonal is negligible compared
+        // to the largest overall, the algorithm bails.
+        cutoff = abs(NumTraits<Scalar>::epsilon() * biggest_in_corner);
+
+        if(sign)
+          *sign = real(mat.diagonal().coeff(index_of_biggest_in_corner)) > 0 ? 1 : -1;
+      }
+
+      // Finish early if the matrix is not full rank.
+      if(biggest_in_corner < cutoff)
+      {
+        for(Index i = k; i < size; i++) transpositions.coeffRef(i) = i;
+        break;
+      }
+
+      transpositions.coeffRef(k) = index_of_biggest_in_corner;
      if(k != index_of_biggest_in_corner)
      {
        // apply the transposition while taking care to consider only
@@ -286,14 +298,14 @@ template<> struct ldlt_inplace<Lower>
        mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k));
        mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s));
        std::swap(mat.coeffRef(k,k),mat.coeffRef(index_of_biggest_in_corner,index_of_biggest_in_corner));
-        for(Index i=k+1;i<index_of_biggest_in_corner;++i)
+        for(int i=k+1;i<index_of_biggest_in_corner;++i)
        {
          Scalar tmp = mat.coeffRef(i,k);
-          mat.coeffRef(i,k) = numext::conj(mat.coeffRef(index_of_biggest_in_corner,i));
-          mat.coeffRef(index_of_biggest_in_corner,i) = numext::conj(tmp);
+          mat.coeffRef(i,k) = conj(mat.coeffRef(index_of_biggest_in_corner,i));
+          mat.coeffRef(index_of_biggest_in_corner,i) = conj(tmp);
        }
        if(NumTraits<Scalar>::IsComplex)
-          mat.coeffRef(index_of_biggest_in_corner,k) = numext::conj(mat.coeff(index_of_biggest_in_corner,k));
+          mat.coeffRef(index_of_biggest_in_corner,k) = conj(mat.coeff(index_of_biggest_in_corner,k));
      }

      // partition the matrix:
@@ -307,28 +319,13 @@ template<> struct ldlt_inplace<Lower>

      if(k>0)
      {
-        temp.head(k) = mat.diagonal().real().head(k).asDiagonal() * A10.adjoint();
+        temp.head(k) = mat.diagonal().head(k).asDiagonal() * A10.adjoint();
        mat.coeffRef(k,k) -= (A10 * temp.head(k)).value();
        if(rs>0)
          A21.noalias() -= A20 * temp.head(k);
      }
-      
-      // In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot
-      // was smaller than the cutoff value. However, since LDLT is not rank-revealing
-      // we should only make sure that we do not introduce INF or NaN values.
-      // Remark that LAPACK also uses 0 as the cutoff value.
-      RealScalar realAkk = numext::real(mat.coeffRef(k,k));
-      if((rs>0) && (abs(realAkk) > RealScalar(0)))
-        A21 /= realAkk;
-
-      if (sign == PositiveSemiDef) {
-        if (realAkk < 0) sign = Indefinite;
-      } else if (sign == NegativeSemiDef) {
-        if (realAkk > 0) sign = Indefinite;
-      } else if (sign == ZeroSign) {
-        if (realAkk > 0) sign = PositiveSemiDef;
-        else if (realAkk < 0) sign = NegativeSemiDef;
-      }
+      if((rs>0) && (abs(mat.coeffRef(k,k)) > cutoff))
+        A21 /= mat.coeffRef(k,k);
    }

    return true;
@@ -342,11 +339,12 @@ template<> struct ldlt_inplace<Lower>
  // Here only rank-1 updates are implemented, to reduce the
  // requirement for intermediate storage and improve accuracy
  template<typename MatrixType, typename WDerived>
-  static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, const typename MatrixType::RealScalar& sigma=1)
+  static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, typename MatrixType::RealScalar sigma=1)
  {
-    using numext::isfinite;
+    using internal::isfinite;
    typedef typename MatrixType::Scalar Scalar;
    typedef typename MatrixType::RealScalar RealScalar;
+    typedef typename MatrixType::Index Index;

    const Index size = mat.rows();
    eigen_assert(mat.cols() == size && w.size()==size);
@@ -361,9 +359,9 @@ template<> struct ldlt_inplace<Lower>
        break;

      // Update the diagonal terms
-      RealScalar dj = numext::real(mat.coeff(j,j));
+      RealScalar dj = real(mat.coeff(j,j));
      Scalar wj = w.coeff(j);
-      RealScalar swj2 = sigma*numext::abs2(wj);
+      RealScalar swj2 = sigma*abs2(wj);
      RealScalar gamma = dj*alpha + swj2;

      mat.coeffRef(j,j) += swj2/alpha;
@@ -374,13 +372,13 @@ template<> struct ldlt_inplace<Lower>
      Index rs = size-j-1;
      w.tail(rs) -= wj * mat.col(j).tail(rs);
      if(gamma != 0)
-        mat.col(j).tail(rs) += (sigma*numext::conj(wj)/gamma)*w.tail(rs);
+        mat.col(j).tail(rs) += (sigma*conj(wj)/gamma)*w.tail(rs);
    }
    return true;
  }

  template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
-  static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, const typename MatrixType::RealScalar& sigma=1)
+  static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, typename MatrixType::RealScalar sigma=1)
  {
    // Apply the permutation to the input w
    tmp = transpositions * w;
@@ -392,14 +390,14 @@ template<> struct ldlt_inplace<Lower>
 template<> struct ldlt_inplace<Upper>
 {
  template<typename MatrixType, typename TranspositionType, typename Workspace>
-  static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)
+  static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0)
  {
    Transpose<MatrixType> matt(mat);
    return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
  }

  template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
-  static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, const typename MatrixType::RealScalar& sigma=1)
+  static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, typename MatrixType::RealScalar sigma=1)
  {
    Transpose<MatrixType> matt(mat);
    return ldlt_inplace<Lower>::update(matt, transpositions, tmp, w.conjugate(), sigma);
@@ -410,16 +408,16 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Lower>
 {
  typedef const TriangularView<const MatrixType, UnitLower> MatrixL;
  typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
-  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }
-  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }
+  static inline MatrixL getL(const MatrixType& m) { return m; }
+  static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
 };

 template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
 {
  typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
  typedef const TriangularView<const MatrixType, UnitUpper> MatrixU;
-  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); }
-  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); }
+  static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
+  static inline MatrixU getU(const MatrixType& m) { return m; }
 };

 } // end namespace internal
@@ -429,8 +427,6 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
 template<typename MatrixType, int _UpLo>
 LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
 {
-  check_template_parameters();
-  
  eigen_assert(a.rows()==a.cols());
  const Index size = a.rows();

@@ -439,9 +435,8 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
  m_transpositions.resize(size);
  m_isInitialized = false;
  m_temporary.resize(size);
-  m_sign = internal::ZeroSign;

-  internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign);
+  internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, &m_sign);

  m_isInitialized = true;
  return *this;
@@ -454,9 +449,8 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
  */
 template<typename MatrixType, int _UpLo>
 template<typename Derived>
-LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename LDLT<MatrixType,_UpLo>::RealScalar& sigma)
+LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w,typename NumTraits<typename MatrixType::Scalar>::Real sigma)
 {
-  typedef typename TranspositionType::StorageIndex IndexType;
  const Index size = w.rows();
  if (m_isInitialized)
  {
@@ -468,9 +462,9 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
    m_matrix.setZero();
    m_transpositions.resize(size);
    for (Index i = 0; i < size; i++)
-      m_transpositions.coeffRef(i) = IndexType(i);
+      m_transpositions.coeffRef(i) = i;
    m_temporary.resize(size);
-    m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef;
+    m_sign = sigma>=0 ? 1 : -1;
    m_isInitialized = true;
  }

@@ -479,45 +473,48 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
  return *this;
 }

-#ifndef EIGEN_PARSED_BY_DOXYGEN
-template<typename _MatrixType, int _UpLo>
-template<typename RhsType, typename DstType>
-void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
+namespace internal {
+template<typename _MatrixType, int _UpLo, typename Rhs>
+struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
+  : solve_retval_base<LDLT<_MatrixType,_UpLo>, Rhs>
 {
-  eigen_assert(rhs.rows() == rows());
-  // dst = P b
-  dst = m_transpositions * rhs;
+  typedef LDLT<_MatrixType,_UpLo> LDLTType;
+  EIGEN_MAKE_SOLVE_HELPERS(LDLTType,Rhs)

-  // dst = L^-1 (P b)
-  matrixL().solveInPlace(dst);
-
-  // dst = D^-1 (L^-1 P b)
-  // more precisely, use pseudo-inverse of D (see bug 241)
-  using std::abs;
-  const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
-  // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
-  // as motivated by LAPACK's xGELSS:
-  // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
-  // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
-  // diagonal element is not well justified and leads to numerical issues in some cases.
-  // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
-  RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
-  
-  for (Index i = 0; i < vecD.size(); ++i)
+  template<typename Dest> void evalTo(Dest& dst) const
  {
-    if(abs(vecD(i)) > tolerance)
-      dst.row(i) /= vecD(i);
-    else
-      dst.row(i).setZero();
+    eigen_assert(rhs().rows() == dec().matrixLDLT().rows());
+    // dst = P b
+    dst = dec().transpositionsP() * rhs();
+
+    // dst = L^-1 (P b)
+    dec().matrixL().solveInPlace(dst);
+
+    // dst = D^-1 (L^-1 P b)
+    // more precisely, use pseudo-inverse of D (see bug 241)
+    using std::abs;
+    using std::max;
+    typedef typename LDLTType::MatrixType MatrixType;
+    typedef typename LDLTType::Scalar Scalar;
+    typedef typename LDLTType::RealScalar RealScalar;
+    const Diagonal<const MatrixType> vectorD = dec().vectorD();
+    RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() * NumTraits<Scalar>::epsilon(),
+				 RealScalar(1) / NumTraits<RealScalar>::highest()); // motivated by LAPACK's xGELSS
+    for (Index i = 0; i < vectorD.size(); ++i) {
+      if(abs(vectorD(i)) > tolerance)
+	dst.row(i) /= vectorD(i);
+      else
+	dst.row(i).setZero();
+    }
+
+    // dst = L^-T (D^-1 L^-1 P b)
+    dec().matrixU().solveInPlace(dst);
+
+    // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b
+    dst = dec().transpositionsP().transpose() * dst;
  }
-
-  // dst = L^-T (D^-1 L^-1 P b)
-  matrixU().solveInPlace(dst);
-
-  // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b
-  dst = m_transpositions.transpose() * dst;
+};
 }
-#endif

 /** \internal use x = ldlt_object.solve(x);
  *
@@ -560,7 +557,7 @@ MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
  // L^* P
  res = matrixU() * res;
  // D(L^*P)
-  res = vectorD().real().asDiagonal() * res;
+  res = vectorD().asDiagonal() * res;
  // L(DL^*P)
  res = matrixL() * res;
  // P^T (LDL^*P)
@@ -569,10 +566,8 @@ MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
  return res;
 }

-#ifndef __CUDACC__
 /** \cholesky_module
  * \returns the Cholesky decomposition with full pivoting without square root of \c *this
-  * \sa MatrixBase::ldlt()
  */
 template<typename MatrixType, unsigned int UpLo>
 inline const LDLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
@@ -583,7 +578,6 @@ SelfAdjointView<MatrixType, UpLo>::ldlt() const

 /** \cholesky_module
  * \returns the Cholesky decomposition with full pivoting without square root of \c *this
-  * \sa SelfAdjointView::ldlt()
  */
 template<typename Derived>
 inline const LDLT<typename MatrixBase<Derived>::PlainObject>
@@ -591,7 +585,6 @@ MatrixBase<Derived>::ldlt() const
 {
  return LDLT<PlainObject>(derived());
 }
-#endif // __CUDACC__

 } // end namespace Eigen

--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -41,7 +41,7 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
  * Example: \include LLT_example.cpp
  * Output: \verbinclude LLT_example.out
  *    
-  * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
+  * \sa MatrixBase::llt(), class LDLT
  */
 /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
  * Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
@@ -59,8 +59,7 @@ template<typename _MatrixType, int _UpLo> class LLT
    };
    typedef typename MatrixType::Scalar Scalar;
    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
-    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
-    typedef typename MatrixType::StorageIndex StorageIndex;
+    typedef typename MatrixType::Index Index;

    enum {
      PacketSize = internal::packet_traits<Scalar>::size,
@@ -84,10 +83,10 @@ template<typename _MatrixType, int _UpLo> class LLT
      * according to the specified problem \a size.
      * \sa LLT()
      */
-    explicit LLT(Index size) : m_matrix(size, size),
+    LLT(Index size) : m_matrix(size, size),
                    m_isInitialized(false) {}

-    explicit LLT(const MatrixType& matrix)
+    LLT(const MatrixType& matrix)
      : m_matrix(matrix.rows(), matrix.cols()),
        m_isInitialized(false)
    {
@@ -116,18 +115,29 @@ template<typename _MatrixType, int _UpLo> class LLT
      * Example: \include LLT_solve.cpp
      * Output: \verbinclude LLT_solve.out
      *
-      * \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt()
+      * \sa solveInPlace(), MatrixBase::llt()
      */
    template<typename Rhs>
-    inline const Solve<LLT, Rhs>
+    inline const internal::solve_retval<LLT, Rhs>
    solve(const MatrixBase<Rhs>& b) const
    {
      eigen_assert(m_isInitialized && "LLT is not initialized.");
      eigen_assert(m_matrix.rows()==b.rows()
                && "LLT::solve(): invalid number of rows of the right hand side matrix b");
-      return Solve<LLT, Rhs>(*this, b.derived());
+      return internal::solve_retval<LLT, Rhs>(*this, b.derived());
    }

+    #ifdef EIGEN2_SUPPORT
+    template<typename OtherDerived, typename ResultType>
+    bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const
+    {
+      *result = this->solve(b);
+      return true;
+    }
+    
+    bool isPositiveDefinite() const { return true; }
+    #endif
+
    template<typename Derived>
    void solveInPlace(MatrixBase<Derived> &bAndX) const;

@@ -162,20 +172,8 @@ template<typename _MatrixType, int _UpLo> class LLT

    template<typename VectorType>
    LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
-    
-    #ifndef EIGEN_PARSED_BY_DOXYGEN
-    template<typename RhsType, typename DstType>
-    EIGEN_DEVICE_FUNC
-    void _solve_impl(const RhsType &rhs, DstType &dst) const;
-    #endif

  protected:
-    
-    static void check_template_parameters()
-    {
-      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
-    }
-    
    /** \internal
      * Used to compute and store L
      * The strict upper part is not used and even not initialized.
@@ -190,18 +188,18 @@ namespace internal {
 template<typename Scalar, int UpLo> struct llt_inplace;

 template<typename MatrixType, typename VectorType>
-static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma)
+static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma)
 {
-  using std::sqrt;
  typedef typename MatrixType::Scalar Scalar;
  typedef typename MatrixType::RealScalar RealScalar;
+  typedef typename MatrixType::Index Index;
  typedef typename MatrixType::ColXpr ColXpr;
  typedef typename internal::remove_all<ColXpr>::type ColXprCleaned;
  typedef typename ColXprCleaned::SegmentReturnType ColXprSegment;
  typedef Matrix<Scalar,Dynamic,1> TempVectorType;
  typedef typename TempVectorType::SegmentReturnType TempVecSegment;

-  Index n = mat.cols();
+  int n = mat.cols();
  eigen_assert(mat.rows()==n && vec.size()==n);

  TempVectorType temp;
@@ -213,12 +211,12 @@ static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const
    // i.e., for sigma > 0
    temp = sqrt(sigma) * vec;

-    for(Index i=0; i<n; ++i)
+    for(int i=0; i<n; ++i)
    {
      JacobiRotation<Scalar> g;
      g.makeGivens(mat(i,i), -temp(i), &mat(i,i));

-      Index rs = n-i-1;
+      int rs = n-i-1;
      if(rs>0)
      {
        ColXprSegment x(mat.col(i).tail(rs));
@@ -231,12 +229,12 @@ static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const
  {
    temp = vec;
    RealScalar beta = 1;
-    for(Index j=0; j<n; ++j)
+    for(int j=0; j<n; ++j)
    {
-      RealScalar Ljj = numext::real(mat.coeff(j,j));
-      RealScalar dj = numext::abs2(Ljj);
+      RealScalar Ljj = real(mat.coeff(j,j));
+      RealScalar dj = abs2(Ljj);
      Scalar wj = temp.coeff(j);
-      RealScalar swj2 = sigma*numext::abs2(wj);
+      RealScalar swj2 = sigma*abs2(wj);
      RealScalar gamma = dj*beta + swj2;

      RealScalar x = dj + swj2/beta;
@@ -252,7 +250,7 @@ static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const
      {
        temp.tail(rs) -= (wj/Ljj) * mat.col(j).tail(rs);
        if(gamma != 0)
-          mat.col(j).tail(rs) = (nLjj/Ljj) * mat.col(j).tail(rs) + (nLjj * sigma*numext::conj(wj)/gamma)*temp.tail(rs);
+          mat.col(j).tail(rs) = (nLjj/Ljj) * mat.col(j).tail(rs) + (nLjj * sigma*conj(wj)/gamma)*temp.tail(rs);
      }
    }
  }
@@ -263,9 +261,9 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
 {
  typedef typename NumTraits<Scalar>::Real RealScalar;
  template<typename MatrixType>
-  static Index unblocked(MatrixType& mat)
+  static typename MatrixType::Index unblocked(MatrixType& mat)
  {
-    using std::sqrt;
+    typedef typename MatrixType::Index Index;
    
    eigen_assert(mat.rows()==mat.cols());
    const Index size = mat.rows();
@@ -277,7 +275,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
      Block<MatrixType,1,Dynamic> A10(mat,k,0,1,k);
      Block<MatrixType,Dynamic,Dynamic> A20(mat,k+1,0,rs,k);

-      RealScalar x = numext::real(mat.coeff(k,k));
+      RealScalar x = real(mat.coeff(k,k));
      if (k>0) x -= A10.squaredNorm();
      if (x<=RealScalar(0))
        return k;
@@ -289,8 +287,9 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
  }

  template<typename MatrixType>
-  static Index blocked(MatrixType& m)
+  static typename MatrixType::Index blocked(MatrixType& m)
  {
+    typedef typename MatrixType::Index Index;
    eigen_assert(m.rows()==m.cols());
    Index size = m.rows();
    if(size<32)
@@ -321,7 +320,7 @@ template<typename Scalar> struct llt_inplace<Scalar, Lower>
  }

  template<typename MatrixType, typename VectorType>
-  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
+  static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
  {
    return Eigen::internal::llt_rank_update_lower(mat, vec, sigma);
  }
@@ -332,19 +331,19 @@ template<typename Scalar> struct llt_inplace<Scalar, Upper>
  typedef typename NumTraits<Scalar>::Real RealScalar;

  template<typename MatrixType>
-  static EIGEN_STRONG_INLINE Index unblocked(MatrixType& mat)
+  static EIGEN_STRONG_INLINE typename MatrixType::Index unblocked(MatrixType& mat)
  {
    Transpose<MatrixType> matt(mat);
    return llt_inplace<Scalar, Lower>::unblocked(matt);
  }
  template<typename MatrixType>
-  static EIGEN_STRONG_INLINE Index blocked(MatrixType& mat)
+  static EIGEN_STRONG_INLINE typename MatrixType::Index blocked(MatrixType& mat)
  {
    Transpose<MatrixType> matt(mat);
    return llt_inplace<Scalar, Lower>::blocked(matt);
  }
  template<typename MatrixType, typename VectorType>
-  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
+  static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
  {
    Transpose<MatrixType> matt(mat);
    return llt_inplace<Scalar, Lower>::rankUpdate(matt, vec.conjugate(), sigma);
@@ -355,8 +354,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Lower>
 {
  typedef const TriangularView<const MatrixType, Lower> MatrixL;
  typedef const TriangularView<const typename MatrixType::AdjointReturnType, Upper> MatrixU;
-  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }
-  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }
+  static inline MatrixL getL(const MatrixType& m) { return m; }
+  static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
  static bool inplace_decomposition(MatrixType& m)
  { return llt_inplace<typename MatrixType::Scalar, Lower>::blocked(m)==-1; }
 };
@@ -365,8 +364,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
 {
  typedef const TriangularView<const typename MatrixType::AdjointReturnType, Lower> MatrixL;
  typedef const TriangularView<const MatrixType, Upper> MatrixU;
-  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); }
-  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); }
+  static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); }
+  static inline MatrixU getU(const MatrixType& m) { return m; }
  static bool inplace_decomposition(MatrixType& m)
  { return llt_inplace<typename MatrixType::Scalar, Upper>::blocked(m)==-1; }
 };
@@ -383,8 +382,6 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
 template<typename MatrixType, int _UpLo>
 LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
 {
-  check_template_parameters();
-  
  eigen_assert(a.rows()==a.cols());
  const Index size = a.rows();
  m_matrix.resize(size, size);
@@ -416,16 +413,22 @@ LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, c

  return *this;
 }
- 
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-template<typename _MatrixType,int _UpLo>
-template<typename RhsType, typename DstType>
-void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
+    
+namespace internal {
+template<typename _MatrixType, int UpLo, typename Rhs>
+struct solve_retval<LLT<_MatrixType, UpLo>, Rhs>
+  : solve_retval_base<LLT<_MatrixType, UpLo>, Rhs>
 {
-  dst = rhs;
-  solveInPlace(dst);
+  typedef LLT<_MatrixType,UpLo> LLTType;
+  EIGEN_MAKE_SOLVE_HELPERS(LLTType,Rhs)
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    dst = rhs();
+    dec().solveInPlace(dst);
+  }
+};
 }
-#endif

 /** \internal use x = llt_object.solve(x);
  * 
@@ -460,10 +463,8 @@ MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
  return matrixL() * matrixL().adjoint().toDenseMatrix();
 }

-#ifndef __CUDACC__
 /** \cholesky_module
  * \returns the LLT decomposition of \c *this
-  * \sa SelfAdjointView::llt()
  */
 template<typename Derived>
 inline const LLT<typename MatrixBase<Derived>::PlainObject>
@@ -474,7 +475,6 @@ MatrixBase<Derived>::llt() const

 /** \cholesky_module
  * \returns the LLT decomposition of \c *this
-  * \sa SelfAdjointView::llt()
  */
 template<typename MatrixType, unsigned int UpLo>
 inline const LLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
@@ -482,8 +482,7 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
 {
  return LLT<PlainObject,UpLo>(m_matrix);
 }
-#endif // __CUDACC__
-  
+
 } // end namespace Eigen

 #endif // EIGEN_LLT_H
--- a/Eigen/src/Cholesky/LLT_MKL.h
+++ b/Eigen/src/Cholesky/LLT_MKL.h
@@ -46,7 +46,7 @@ template<typename Scalar> struct mkl_llt;
 template<> struct mkl_llt<EIGTYPE> \
 { \
  template<typename MatrixType> \
-  static inline Index potrf(MatrixType& m, char uplo) \
+  static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \
  { \
    lapack_int matrix_order; \
    lapack_int size, lda, info, StorageOrder; \
@@ -60,30 +60,30 @@ template<> struct mkl_llt<EIGTYPE> \
    lda = m.outerStride(); \
 \
    info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \
-    info = (info==0) ? -1 : info>0 ? info-1 : size; \
+    info = (info==0) ? Success : NumericalIssue; \
    return info; \
  } \
 }; \
 template<> struct llt_inplace<EIGTYPE, Lower> \
 { \
  template<typename MatrixType> \
-  static Index blocked(MatrixType& m) \
+  static typename MatrixType::Index blocked(MatrixType& m) \
  { \
    return mkl_llt<EIGTYPE>::potrf(m, 'L'); \
  } \
  template<typename MatrixType, typename VectorType> \
-  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
+  static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
  { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \
 }; \
 template<> struct llt_inplace<EIGTYPE, Upper> \
 { \
  template<typename MatrixType> \
-  static Index blocked(MatrixType& m) \
+  static typename MatrixType::Index blocked(MatrixType& m) \
  { \
    return mkl_llt<EIGTYPE>::potrf(m, 'U'); \
  } \
  template<typename MatrixType, typename VectorType> \
-  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
+  static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
  { \
    Transpose<MatrixType> matt(mat); \
    return llt_inplace<EIGTYPE, Lower>::rankUpdate(matt, vec.conjugate(), sigma); \
--- a/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -48,9 +48,10 @@ void cholmod_configure_matrix(CholmodType& mat)
 /** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object.
  * Note that the data are shared.
  */
-template<typename _Scalar, int _Options, typename _StorageIndex>
-cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
+template<typename _Scalar, int _Options, typename _Index>
+cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
 {
+  typedef SparseMatrix<_Scalar,_Options,_Index> MatrixType;
  cholmod_sparse res;
  res.nzmax   = mat.nonZeros();
  res.nrow    = mat.rows();;
@@ -58,12 +59,10 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
  res.p       = mat.outerIndexPtr();
  res.i       = mat.innerIndexPtr();
  res.x       = mat.valuePtr();
-  res.z       = 0;
  res.sorted  = 1;
  if(mat.isCompressed())
  {
    res.packed  = 1;
-    res.nz = 0;
  }
  else
  {
@@ -74,17 +73,13 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
  res.dtype   = 0;
  res.stype   = -1;
  
-  if (internal::is_same<_StorageIndex,int>::value)
+  if (internal::is_same<_Index,int>::value)
  {
    res.itype = CHOLMOD_INT;
  }
-  else if (internal::is_same<_StorageIndex,UF_long>::value)
-  {
-    res.itype = CHOLMOD_LONG;
-  }
  else
  {
-    eigen_assert(false && "Index type not supported yet");
+    eigen_assert(false && "Index type different than int is not supported yet");
  }

  // setup res.xtype
@@ -105,7 +100,7 @@ const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>&
 /** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix.
  * The data are not copied but shared. */
 template<typename _Scalar, int _Options, typename _Index, unsigned int UpLo>
-cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)
+cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)
 {
  cholmod_sparse res = viewAsCholmod(mat.matrix().const_cast_derived());
  
@@ -128,7 +123,7 @@ cholmod_dense viewAsCholmod(MatrixBase<Derived>& mat)
  res.ncol   = mat.cols();
  res.nzmax  = res.nrow * res.ncol;
  res.d      = Derived::IsVectorAtCompileTime ? mat.derived().size() : mat.derived().outerStride();
-  res.x      = (void*)(mat.derived().data());
+  res.x      = mat.derived().data();
  res.z      = 0;

  internal::cholmod_configure_matrix<Scalar>(res);
@@ -138,12 +133,12 @@ cholmod_dense viewAsCholmod(MatrixBase<Derived>& mat)

 /** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix.
  * The data are not copied but shared. */
-template<typename Scalar, int Flags, typename StorageIndex>
-MappedSparseMatrix<Scalar,Flags,StorageIndex> viewAsEigen(cholmod_sparse& cm)
+template<typename Scalar, int Flags, typename Index>
+MappedSparseMatrix<Scalar,Flags,Index> viewAsEigen(cholmod_sparse& cm)
 {
-  return MappedSparseMatrix<Scalar,Flags,StorageIndex>
-         (cm.nrow, cm.ncol, static_cast<StorageIndex*>(cm.p)[cm.ncol],
-          static_cast<StorageIndex*>(cm.p), static_cast<StorageIndex*>(cm.i),static_cast<Scalar*>(cm.x) );
+  return MappedSparseMatrix<Scalar,Flags,Index>
+         (cm.nrow, cm.ncol, reinterpret_cast<Index*>(cm.p)[cm.ncol],
+          reinterpret_cast<Index*>(cm.p), reinterpret_cast<Index*>(cm.i),reinterpret_cast<Scalar*>(cm.x) );
 }

 enum CholmodMode {
@@ -157,33 +152,27 @@ enum CholmodMode {
  * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT
  */
 template<typename _MatrixType, int _UpLo, typename Derived>
-class CholmodBase : public SparseSolverBase<Derived>
+class CholmodBase : internal::noncopyable
 {
-  protected:
-    typedef SparseSolverBase<Derived> Base;
-    using Base::derived;
-    using Base::m_isInitialized;
  public:
    typedef _MatrixType MatrixType;
    enum { UpLo = _UpLo };
    typedef typename MatrixType::Scalar Scalar;
    typedef typename MatrixType::RealScalar RealScalar;
    typedef MatrixType CholMatrixType;
-    typedef typename MatrixType::StorageIndex StorageIndex;
+    typedef typename MatrixType::Index Index;

  public:

    CholmodBase()
-      : m_cholmodFactor(0), m_info(Success)
+      : m_cholmodFactor(0), m_info(Success), m_isInitialized(false)
    {
-      m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
      cholmod_start(&m_cholmod);
    }

-    explicit CholmodBase(const MatrixType& matrix)
-      : m_cholmodFactor(0), m_info(Success)
+    CholmodBase(const MatrixType& matrix)
+      : m_cholmodFactor(0), m_info(Success), m_isInitialized(false)
    {
-      m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
      cholmod_start(&m_cholmod);
      compute(matrix);
    }
@@ -195,8 +184,11 @@ class CholmodBase : public SparseSolverBase<Derived>
      cholmod_finish(&m_cholmod);
    }
    
-    inline StorageIndex cols() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); }
-    inline StorageIndex rows() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); }
+    inline Index cols() const { return m_cholmodFactor->n; }
+    inline Index rows() const { return m_cholmodFactor->n; }
+    
+    Derived& derived() { return *static_cast<Derived*>(this); }
+    const Derived& derived() const { return *static_cast<const Derived*>(this); }
    
    /** \brief Reports whether previous computation was successful.
      *
@@ -217,7 +209,35 @@ class CholmodBase : public SparseSolverBase<Derived>
      return derived();
    }
    
-    /** Performs a symbolic decomposition on the sparsity pattern of \a matrix.
+    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
+      *
+      * \sa compute()
+      */
+    template<typename Rhs>
+    inline const internal::solve_retval<CholmodBase, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "LLT is not initialized.");
+      eigen_assert(rows()==b.rows()
+                && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b");
+      return internal::solve_retval<CholmodBase, Rhs>(*this, b.derived());
+    }
+    
+    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
+      *
+      * \sa compute()
+      */
+    template<typename Rhs>
+    inline const internal::sparse_solve_retval<CholmodBase, Rhs>
+    solve(const SparseMatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "LLT is not initialized.");
+      eigen_assert(rows()==b.rows()
+                && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b");
+      return internal::sparse_solve_retval<CholmodBase, Rhs>(*this, b.derived());
+    }
+    
+    /** Performs a symbolic decomposition on the sparcity of \a matrix.
      *
      * This function is particularly useful when solving for several problems having the same structure.
      * 
@@ -241,7 +261,7 @@ class CholmodBase : public SparseSolverBase<Derived>
    
    /** Performs a numeric decomposition of \a matrix
      *
-      * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed.
+      * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed.
      *
      * \sa analyzePattern()
      */
@@ -249,10 +269,9 @@ class CholmodBase : public SparseSolverBase<Derived>
    {
      eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
      cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());
-      cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod);
+      cholmod_factorize(&A, m_cholmodFactor, &m_cholmod);
      
-      // If the factorization failed, minor is the column at which it did. On success minor == n.
-      this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue);
+      this->m_info = Success;
      m_factorizationIsOk = true;
    }
    
@@ -263,34 +282,30 @@ class CholmodBase : public SparseSolverBase<Derived>
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    /** \internal */
    template<typename Rhs,typename Dest>
-    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
+    void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
    {
      eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
      const Index size = m_cholmodFactor->n;
-      EIGEN_UNUSED_VARIABLE(size);
      eigen_assert(size==b.rows());

      // note: cd stands for Cholmod Dense
-      Rhs& b_ref(b.const_cast_derived());
-      cholmod_dense b_cd = viewAsCholmod(b_ref);
+      cholmod_dense b_cd = viewAsCholmod(b.const_cast_derived());
      cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod);
      if(!x_cd)
      {
        this->m_info = NumericalIssue;
-        return;
      }
-      // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
+      // TODO optimize this copy by swapping when possible (be carreful with alignment, etc.)
      dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols());
      cholmod_free_dense(&x_cd, &m_cholmod);
    }
    
    /** \internal */
    template<typename RhsScalar, int RhsOptions, typename RhsIndex, typename DestScalar, int DestOptions, typename DestIndex>
-    void _solve_impl(const SparseMatrix<RhsScalar,RhsOptions,RhsIndex> &b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
+    void _solve(const SparseMatrix<RhsScalar,RhsOptions,RhsIndex> &b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
    {
      eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
      const Index size = m_cholmodFactor->n;
-      EIGEN_UNUSED_VARIABLE(size);
      eigen_assert(size==b.rows());

      // note: cs stands for Cholmod Sparse
@@ -299,39 +314,22 @@ class CholmodBase : public SparseSolverBase<Derived>
      if(!x_cs)
      {
        this->m_info = NumericalIssue;
-        return;
      }
-      // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
+      // TODO optimize this copy by swapping when possible (be carreful with alignment, etc.)
      dest = viewAsEigen<DestScalar,DestOptions,DestIndex>(*x_cs);
      cholmod_free_sparse(&x_cs, &m_cholmod);
    }
    #endif // EIGEN_PARSED_BY_DOXYGEN
    
-    
-    /** Sets the shift parameter that will be used to adjust the diagonal coefficients during the numerical factorization.
-      *
-      * During the numerical factorization, an offset term is added to the diagonal coefficients:\n
-      * \c d_ii = \a offset + \c d_ii
-      *
-      * The default is \a offset=0.
-      *
-      * \returns a reference to \c *this.
-      */
-    Derived& setShift(const RealScalar& offset)
-    {
-      m_shiftOffset[0] = offset;
-      return derived();
-    }
-    
    template<typename Stream>
-    void dumpMemory(Stream& /*s*/)
+    void dumpMemory(Stream& s)
    {}
    
  protected:
    mutable cholmod_common m_cholmod;
    cholmod_factor* m_cholmodFactor;
-    RealScalar m_shiftOffset[2];
    mutable ComputationInfo m_info;
+    bool m_isInitialized;
    int m_factorizationIsOk;
    int m_analysisIsOk;
 };
@@ -342,8 +340,8 @@ class CholmodBase : public SparseSolverBase<Derived>
  *
  * This class allows to solve for A.X = B sparse linear problems via a simplicial LL^T Cholesky factorization
  * using the Cholmod library.
-  * This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Therefore, it has little practical interest.
-  * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
+  * This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Thefore, it has little practical interest.
+  * The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
  * X and B can be either dense or sparse.
  *
  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
@@ -369,7 +367,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
    CholmodSimplicialLLT(const MatrixType& matrix) : Base()
    {
      init();
-      this->compute(matrix);
+      compute(matrix);
    }

    ~CholmodSimplicialLLT() {}
@@ -389,8 +387,8 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
  *
  * This class allows to solve for A.X = B sparse linear problems via a simplicial LDL^T Cholesky factorization
  * using the Cholmod library.
-  * This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Therefore, it has little practical interest.
-  * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
+  * This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Thefore, it has little practical interest.
+  * The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
  * X and B can be either dense or sparse.
  *
  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
@@ -416,7 +414,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
    CholmodSimplicialLDLT(const MatrixType& matrix) : Base()
    {
      init();
-      this->compute(matrix);
+      compute(matrix);
    }

    ~CholmodSimplicialLDLT() {}
@@ -435,7 +433,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
  * This class allows to solve for A.X = B sparse linear problems via a supernodal LL^T Cholesky factorization
  * using the Cholmod library.
  * This supernodal variant performs best on dense enough problems, e.g., 3D FEM, or very high order 2D FEM.
-  * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
+  * The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
  * X and B can be either dense or sparse.
  *
  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
@@ -461,7 +459,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
    CholmodSupernodalLLT(const MatrixType& matrix) : Base()
    {
      init();
-      this->compute(matrix);
+      compute(matrix);
    }

    ~CholmodSupernodalLLT() {}
@@ -478,7 +476,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
  * \brief A general Cholesky factorization and solver based on Cholmod
  *
  * This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization
-  * using the Cholmod library. The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
+  * using the Cholmod library. The sparse matrix A must be selfajoint and positive definite. The vectors or matrices
  * X and B can be either dense or sparse.
  *
  * This variant permits to change the underlying Cholesky method at runtime.
@@ -508,7 +506,7 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
    CholmodDecomposition(const MatrixType& matrix) : Base()
    {
      init();
-      this->compute(matrix);
+      compute(matrix);
    }

    ~CholmodDecomposition() {}
@@ -546,6 +544,36 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
    }
 };

+namespace internal {
+  
+template<typename _MatrixType, int _UpLo, typename Derived, typename Rhs>
+struct solve_retval<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
+  : solve_retval_base<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
+{
+  typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec;
+  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    dec()._solve(rhs(),dst);
+  }
+};
+
+template<typename _MatrixType, int _UpLo, typename Derived, typename Rhs>
+struct sparse_solve_retval<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
+  : sparse_solve_retval_base<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
+{
+  typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec;
+  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    dec()._solve(rhs(),dst);
+  }
+};
+
+} // end namespace internal
+
 } // end namespace Eigen

 #endif // EIGEN_CHOLMODSUPPORT_H
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -24,9 +24,6 @@ namespace Eigen {
  * API for the %Matrix class provides easy access to linear-algebra
  * operations.
  *
-  * See documentation of class Matrix for detailed information on the template parameters
-  * storage layout.
-  * 
  * This class can be extended with the help of the plugin mechanism described on the page
  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
  *
@@ -72,27 +69,11 @@ class Array
      * the usage of 'using'. This should be done only for operator=.
      */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)
    {
      return Base::operator=(other);
    }

-    /** Set all the entries to \a value.
-      * \sa DenseBase::setConstant(), DenseBase::fill()
-      */
-    /* This overload is needed because the usage of
-      *   using Base::operator=;
-      * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped
-      * the usage of 'using'. This should be done only for operator=.
-      */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Array& operator=(const Scalar &value)
-    {
-      Base::setConstant(value);
-      return *this;
-    }
-
    /** Copies the value of the expression \a other into \c *this with automatic resizing.
      *
      * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
@@ -103,8 +84,7 @@ class Array
      * remain row-vectors and vectors remain vectors.
      */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Array& operator=(const DenseBase<OtherDerived>& other)
+    EIGEN_STRONG_INLINE Array& operator=(const ArrayBase<OtherDerived>& other)
    {
      return Base::_set(other);
    }
@@ -112,12 +92,11 @@ class Array
    /** This is a special case of the templated operator=. Its purpose is to
      * prevent a default operator= from hiding the templated operator=.
      */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Array& operator=(const Array& other)
    {
      return Base::_set(other);
    }
-    
+
    /** Default constructor.
      *
      * For fixed-size matrices, does nothing.
@@ -128,118 +107,122 @@ class Array
      *
      * \sa resize(Index,Index)
      */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Array() : Base()
+    EIGEN_STRONG_INLINE explicit Array() : Base()
    {
      Base::_check_template_params();
-      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
    }

 #ifndef EIGEN_PARSED_BY_DOXYGEN
    // FIXME is it still needed ??
    /** \internal */
-    EIGEN_DEVICE_FUNC
    Array(internal::constructor_without_unaligned_array_assert)
      : Base(internal::constructor_without_unaligned_array_assert())
    {
      Base::_check_template_params();
-      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
    }
 #endif

-#ifdef EIGEN_HAVE_RVALUE_REFERENCES
-    EIGEN_DEVICE_FUNC
-    Array(Array&& other)
-      : Base(std::move(other))
-    {
-      Base::_check_template_params();
-      if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
-        Base::_set_noalias(other);
-    }
-    EIGEN_DEVICE_FUNC
-    Array& operator=(Array&& other)
-    {
-      other.swap(*this);
-      return *this;
-    }
-#endif
-
-    #ifndef EIGEN_PARSED_BY_DOXYGEN
-    template<typename T>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE explicit Array(const T& x)
-    {
-      Base::_check_template_params();
-      Base::template _init1<T>(x);
-    }
-
-    template<typename T0, typename T1>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
-    {
-      Base::_check_template_params();
-      this->template _init2<T0,T1>(val0, val1);
-    }
-    #else
-    /** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */
-    EIGEN_DEVICE_FUNC explicit Array(const Scalar *data);
    /** Constructs a vector or row-vector with given dimension. \only_for_vectors
      *
      * Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
      * it is redundant to pass the dimension here, so it makes more sense to use the default
-      * constructor Array() instead.
+      * constructor Matrix() instead.
      */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE explicit Array(Index dim);
-    /** constructs an initialized 1x1 Array with the given coefficient */
-    Array(const Scalar& value);
-    /** constructs an uninitialized array with \a rows rows and \a cols columns.
+    EIGEN_STRONG_INLINE explicit Array(Index dim)
+      : Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim)
+    {
+      Base::_check_template_params();
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array)
+      eigen_assert(dim >= 0);
+      eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
+      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+    }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename T0, typename T1>
+    EIGEN_STRONG_INLINE Array(const T0& x, const T1& y)
+    {
+      Base::_check_template_params();
+      this->template _init2<T0,T1>(x, y);
+    }
+    #else
+    /** constructs an uninitialized matrix with \a rows rows and \a cols columns.
      *
-      * This is useful for dynamic-size arrays. For fixed-size arrays,
+      * This is useful for dynamic-size matrices. For fixed-size matrices,
      * it is redundant to pass these parameters, so one should use the default constructor
-      * Array() instead. */
+      * Matrix() instead. */
    Array(Index rows, Index cols);
    /** constructs an initialized 2D vector with given coefficients */
-    Array(const Scalar& val0, const Scalar& val1);
+    Array(const Scalar& x, const Scalar& y);
    #endif

    /** constructs an initialized 3D vector with given coefficients */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
+    EIGEN_STRONG_INLINE Array(const Scalar& x, const Scalar& y, const Scalar& z)
    {
      Base::_check_template_params();
      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 3)
-      m_storage.data()[0] = val0;
-      m_storage.data()[1] = val1;
-      m_storage.data()[2] = val2;
+      m_storage.data()[0] = x;
+      m_storage.data()[1] = y;
+      m_storage.data()[2] = z;
    }
    /** constructs an initialized 4D vector with given coefficients */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
+    EIGEN_STRONG_INLINE Array(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
    {
      Base::_check_template_params();
      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 4)
-      m_storage.data()[0] = val0;
-      m_storage.data()[1] = val1;
-      m_storage.data()[2] = val2;
-      m_storage.data()[3] = val3;
+      m_storage.data()[0] = x;
+      m_storage.data()[1] = y;
+      m_storage.data()[2] = z;
+      m_storage.data()[3] = w;
    }

+    explicit Array(const Scalar *data);
+
+    /** Constructor copying the value of the expression \a other */
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE Array(const ArrayBase<OtherDerived>& other)
+             : Base(other.rows() * other.cols(), other.rows(), other.cols())
+    {
+      Base::_check_template_params();
+      Base::_set_noalias(other);
+    }
    /** Copy constructor */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Array(const Array& other)
-            : Base(other)
-    { }
+            : Base(other.rows() * other.cols(), other.rows(), other.cols())
+    {
+      Base::_check_template_params();
+      Base::_set_noalias(other);
+    }
+    /** Copy constructor with in-place evaluation */
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE Array(const ReturnByValue<OtherDerived>& other)
+    {
+      Base::_check_template_params();
+      Base::resize(other.rows(), other.cols());
+      other.evalTo(*this);
+    }

    /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
-      : Base(other.derived())
-    { }
+      : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
+    {
+      Base::_check_template_params();
+      Base::resize(other.rows(), other.cols());
+      *this = other;
+    }

-    EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
-    EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
+    /** Override MatrixBase::swap() since for dynamic-sized matrices of same type it is enough to swap the
+      * data pointers.
+      */
+    template<typename OtherDerived>
+    void swap(ArrayBase<OtherDerived> const & other)
+    { this->_swap(other.derived()); }
+
+    inline Index innerStride() const { return 1; }
+    inline Index outerStride() const { return this->innerSize(); }

    #ifdef EIGEN_ARRAY_PLUGIN
    #include EIGEN_ARRAY_PLUGIN
--- a/Eigen/src/Core/ArrayBase.h
+++ b/Eigen/src/Core/ArrayBase.h
@@ -50,6 +50,7 @@ template<typename Derived> class ArrayBase
                typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;

    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
@@ -63,7 +64,8 @@ template<typename Derived> class ArrayBase
    using Base::MaxSizeAtCompileTime;
    using Base::IsVectorAtCompileTime;
    using Base::Flags;
-    
+    using Base::CoeffReadCost;
+
    using Base::derived;
    using Base::const_cast_derived;
    using Base::rows;
@@ -83,10 +85,22 @@ template<typename Derived> class ArrayBase
 #endif // not EIGEN_PARSED_BY_DOXYGEN

 #ifndef EIGEN_PARSED_BY_DOXYGEN
-    typedef typename Base::PlainObject PlainObject;
+    /** \internal the plain matrix type corresponding to this expression. Note that is not necessarily
+      * exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const
+      * reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either
+      * PlainObject or const PlainObject&.
+      */
+    typedef Array<typename internal::traits<Derived>::Scalar,
+                internal::traits<Derived>::RowsAtCompileTime,
+                internal::traits<Derived>::ColsAtCompileTime,
+                AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
+                internal::traits<Derived>::MaxRowsAtCompileTime,
+                internal::traits<Derived>::MaxColsAtCompileTime
+          > PlainObject;
+

    /** \internal Represents a matrix with all coefficients equal to one another*/
-    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
+    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
 #endif // not EIGEN_PARSED_BY_DOXYGEN

 #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
@@ -104,57 +118,40 @@ template<typename Derived> class ArrayBase
    /** Special case of the template operator=, in order to prevent the compiler
      * from generating a default operator= (issue hit with g++ 4.1)
      */
-    EIGEN_DEVICE_FUNC
    Derived& operator=(const ArrayBase& other)
    {
-      internal::call_assignment(derived(), other.derived());
-      return derived();
+      return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
    }
-    
-    /** Set all the entries to \a value.
-      * \sa DenseBase::setConstant(), DenseBase::fill() */
-    EIGEN_DEVICE_FUNC
-    Derived& operator=(const Scalar &value)
-    { Base::setConstant(value); return derived(); }

-    EIGEN_DEVICE_FUNC
-    Derived& operator+=(const Scalar& scalar);
-    EIGEN_DEVICE_FUNC
-    Derived& operator-=(const Scalar& scalar);
+    Derived& operator+=(const Scalar& scalar)
+    { return *this = derived() + scalar; }
+    Derived& operator-=(const Scalar& scalar)
+    { return *this = derived() - scalar; }

    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& operator+=(const ArrayBase<OtherDerived>& other);
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& operator-=(const ArrayBase<OtherDerived>& other);

    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& operator*=(const ArrayBase<OtherDerived>& other);

    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& operator/=(const ArrayBase<OtherDerived>& other);

  public:
-    EIGEN_DEVICE_FUNC
    ArrayBase<Derived>& array() { return *this; }
-    EIGEN_DEVICE_FUNC
    const ArrayBase<Derived>& array() const { return *this; }

-    /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array
+    /** \returns an \link MatrixBase Matrix \endlink expression of this array
      * \sa MatrixBase::array() */
-    EIGEN_DEVICE_FUNC
-    MatrixWrapper<Derived> matrix() { return MatrixWrapper<Derived>(derived()); }
-    EIGEN_DEVICE_FUNC
-    const MatrixWrapper<const Derived> matrix() const { return MatrixWrapper<const Derived>(derived()); }
+    MatrixWrapper<Derived> matrix() { return derived(); }
+    const MatrixWrapper<const Derived> matrix() const { return derived(); }

 //     template<typename Dest>
 //     inline void evalTo(Dest& dst) const { dst = matrix(); }

  protected:
-    EIGEN_DEVICE_FUNC
    ArrayBase() : Base() {}

  private:
@@ -179,7 +176,8 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
 {
-  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
+  SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
+  tmp = other.derived();
  return derived();
 }

@@ -192,7 +190,8 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
 {
-  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
+  SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
+  tmp = other.derived();
  return derived();
 }

@@ -205,7 +204,8 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
 {
-  call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
+  SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, OtherDerived> tmp(derived());
+  tmp = other.derived();
  return derived();
 }

@@ -218,7 +218,8 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
 {
-  call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar>());
+  SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, Derived, OtherDerived> tmp(derived());
+  tmp = other.derived();
  return derived();
 }

--- a/Eigen/src/Core/ArrayWrapper.h
+++ b/Eigen/src/Core/ArrayWrapper.h
@@ -29,11 +29,6 @@ struct traits<ArrayWrapper<ExpressionType> >
  : public traits<typename remove_all<typename ExpressionType::Nested>::type >
 {
  typedef ArrayXpr XprKind;
-  // Let's remove NestByRefBit
-  enum {
-    Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
-    Flags = Flags0 & ~NestByRefBit
-  };
 };
 }

@@ -44,7 +39,6 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
    typedef ArrayBase<ArrayWrapper> Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
-    typedef typename internal::remove_all<ExpressionType>::type NestedExpression;

    typedef typename internal::conditional<
                       internal::is_lvalue<ExpressionType>::value,
@@ -52,71 +46,58 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
                       const Scalar
                     >::type ScalarWithConstIfNotLvalue;

-    typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType;
+    typedef typename internal::nested<ExpressionType>::type NestedExpressionType;

-    EIGEN_DEVICE_FUNC
-    explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
+    inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}

-    EIGEN_DEVICE_FUNC
    inline Index rows() const { return m_expression.rows(); }
-    EIGEN_DEVICE_FUNC
    inline Index cols() const { return m_expression.cols(); }
-    EIGEN_DEVICE_FUNC
    inline Index outerStride() const { return m_expression.outerStride(); }
-    EIGEN_DEVICE_FUNC
    inline Index innerStride() const { return m_expression.innerStride(); }

-    EIGEN_DEVICE_FUNC
-    inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
-    EIGEN_DEVICE_FUNC
+    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
    inline const Scalar* data() const { return m_expression.data(); }

-    EIGEN_DEVICE_FUNC
-    inline CoeffReturnType coeff(Index rowId, Index colId) const
+    inline CoeffReturnType coeff(Index row, Index col) const
    {
-      return m_expression.coeff(rowId, colId);
+      return m_expression.coeff(row, col);
    }

-    EIGEN_DEVICE_FUNC
-    inline Scalar& coeffRef(Index rowId, Index colId)
+    inline Scalar& coeffRef(Index row, Index col)
    {
-      return m_expression.const_cast_derived().coeffRef(rowId, colId);
+      return m_expression.const_cast_derived().coeffRef(row, col);
    }

-    EIGEN_DEVICE_FUNC
-    inline const Scalar& coeffRef(Index rowId, Index colId) const
+    inline const Scalar& coeffRef(Index row, Index col) const
    {
-      return m_expression.const_cast_derived().coeffRef(rowId, colId);
+      return m_expression.const_cast_derived().coeffRef(row, col);
    }

-    EIGEN_DEVICE_FUNC
    inline CoeffReturnType coeff(Index index) const
    {
      return m_expression.coeff(index);
    }

-    EIGEN_DEVICE_FUNC
    inline Scalar& coeffRef(Index index)
    {
      return m_expression.const_cast_derived().coeffRef(index);
    }

-    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index index) const
    {
      return m_expression.const_cast_derived().coeffRef(index);
    }

    template<int LoadMode>
-    inline const PacketScalar packet(Index rowId, Index colId) const
+    inline const PacketScalar packet(Index row, Index col) const
    {
-      return m_expression.template packet<LoadMode>(rowId, colId);
+      return m_expression.template packet<LoadMode>(row, col);
    }

    template<int LoadMode>
-    inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
+    inline void writePacket(Index row, Index col, const PacketScalar& x)
    {
-      m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val);
+      m_expression.const_cast_derived().template writePacket<LoadMode>(row, col, x);
    }

    template<int LoadMode>
@@ -126,17 +107,15 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
    }

    template<int LoadMode>
-    inline void writePacket(Index index, const PacketScalar& val)
+    inline void writePacket(Index index, const PacketScalar& x)
    {
-      m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
+      m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
    }

    template<typename Dest>
-    EIGEN_DEVICE_FUNC
    inline void evalTo(Dest& dst) const { dst = m_expression; }

    const typename internal::remove_all<NestedExpressionType>::type& 
-    EIGEN_DEVICE_FUNC
    nestedExpression() const 
    {
      return m_expression;
@@ -144,12 +123,10 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >

    /** Forwards the resizing request to the nested expression
      * \sa DenseBase::resize(Index)  */
-    EIGEN_DEVICE_FUNC
    void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
    /** Forwards the resizing request to the nested expression
      * \sa DenseBase::resize(Index,Index)*/
-    EIGEN_DEVICE_FUNC
-    void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); }
+    void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }

  protected:
    NestedExpressionType m_expression;
@@ -172,11 +149,6 @@ struct traits<MatrixWrapper<ExpressionType> >
 : public traits<typename remove_all<typename ExpressionType::Nested>::type >
 {
  typedef MatrixXpr XprKind;
-  // Let's remove NestByRefBit
-  enum {
-    Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
-    Flags = Flags0 & ~NestByRefBit
-  };
 };
 }

@@ -187,7 +159,6 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
    typedef MatrixBase<MatrixWrapper<ExpressionType> > Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)
-    typedef typename internal::remove_all<ExpressionType>::type NestedExpression;

    typedef typename internal::conditional<
                       internal::is_lvalue<ExpressionType>::value,
@@ -195,71 +166,58 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
                       const Scalar
                     >::type ScalarWithConstIfNotLvalue;

-    typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType;
+    typedef typename internal::nested<ExpressionType>::type NestedExpressionType;

-    EIGEN_DEVICE_FUNC
-    explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
+    inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}

-    EIGEN_DEVICE_FUNC
    inline Index rows() const { return m_expression.rows(); }
-    EIGEN_DEVICE_FUNC
    inline Index cols() const { return m_expression.cols(); }
-    EIGEN_DEVICE_FUNC
    inline Index outerStride() const { return m_expression.outerStride(); }
-    EIGEN_DEVICE_FUNC
    inline Index innerStride() const { return m_expression.innerStride(); }

-    EIGEN_DEVICE_FUNC
-    inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
-    EIGEN_DEVICE_FUNC
+    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
    inline const Scalar* data() const { return m_expression.data(); }

-    EIGEN_DEVICE_FUNC
-    inline CoeffReturnType coeff(Index rowId, Index colId) const
+    inline CoeffReturnType coeff(Index row, Index col) const
    {
-      return m_expression.coeff(rowId, colId);
+      return m_expression.coeff(row, col);
    }

-    EIGEN_DEVICE_FUNC
-    inline Scalar& coeffRef(Index rowId, Index colId)
+    inline Scalar& coeffRef(Index row, Index col)
    {
-      return m_expression.const_cast_derived().coeffRef(rowId, colId);
+      return m_expression.const_cast_derived().coeffRef(row, col);
    }

-    EIGEN_DEVICE_FUNC
-    inline const Scalar& coeffRef(Index rowId, Index colId) const
+    inline const Scalar& coeffRef(Index row, Index col) const
    {
-      return m_expression.derived().coeffRef(rowId, colId);
+      return m_expression.derived().coeffRef(row, col);
    }

-    EIGEN_DEVICE_FUNC
    inline CoeffReturnType coeff(Index index) const
    {
      return m_expression.coeff(index);
    }

-    EIGEN_DEVICE_FUNC
    inline Scalar& coeffRef(Index index)
    {
      return m_expression.const_cast_derived().coeffRef(index);
    }

-    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index index) const
    {
      return m_expression.const_cast_derived().coeffRef(index);
    }

    template<int LoadMode>
-    inline const PacketScalar packet(Index rowId, Index colId) const
+    inline const PacketScalar packet(Index row, Index col) const
    {
-      return m_expression.template packet<LoadMode>(rowId, colId);
+      return m_expression.template packet<LoadMode>(row, col);
    }

    template<int LoadMode>
-    inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
+    inline void writePacket(Index row, Index col, const PacketScalar& x)
    {
-      m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val);
+      m_expression.const_cast_derived().template writePacket<LoadMode>(row, col, x);
    }

    template<int LoadMode>
@@ -269,12 +227,11 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
    }

    template<int LoadMode>
-    inline void writePacket(Index index, const PacketScalar& val)
+    inline void writePacket(Index index, const PacketScalar& x)
    {
-      m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
+      m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
    }

-    EIGEN_DEVICE_FUNC
    const typename internal::remove_all<NestedExpressionType>::type& 
    nestedExpression() const 
    {
@@ -283,12 +240,10 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >

    /** Forwards the resizing request to the nested expression
      * \sa DenseBase::resize(Index)  */
-    EIGEN_DEVICE_FUNC
    void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
    /** Forwards the resizing request to the nested expression
      * \sa DenseBase::resize(Index,Index)*/
-    EIGEN_DEVICE_FUNC
-    void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); }
+    void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }

  protected:
    NestedExpressionType m_expression;
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -14,6 +14,471 @@

 namespace Eigen {

+namespace internal {
+
+/***************************************************************************
+* Part 1 : the logic deciding a strategy for traversal and unrolling       *
+***************************************************************************/
+
+template <typename Derived, typename OtherDerived>
+struct assign_traits
+{
+public:
+  enum {
+    DstIsAligned = Derived::Flags & AlignedBit,
+    DstHasDirectAccess = Derived::Flags & DirectAccessBit,
+    SrcIsAligned = OtherDerived::Flags & AlignedBit,
+    JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
+  };
+
+private:
+  enum {
+    InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
+              : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
+              : int(Derived::RowsAtCompileTime),
+    InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
+              : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
+              : int(Derived::MaxRowsAtCompileTime),
+    MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
+    PacketSize = packet_traits<typename Derived::Scalar>::size
+  };
+
+  enum {
+    StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
+    MightVectorize = StorageOrdersAgree
+                  && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
+    MayInnerVectorize  = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
+                       && int(DstIsAligned) && int(SrcIsAligned),
+    MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
+    MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
+                       && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
+      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
+         so it's only good for large enough sizes. */
+    MaySliceVectorize  = MightVectorize && DstHasDirectAccess
+                       && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
+      /* slice vectorization can be slow, so we only want it if the slices are big, which is
+         indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
+         in a fixed-size matrix */
+  };
+
+public:
+  enum {
+    Traversal = int(MayInnerVectorize)  ? int(InnerVectorizedTraversal)
+              : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
+              : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)
+              : int(MayLinearize)       ? int(LinearTraversal)
+                                        : int(DefaultTraversal),
+    Vectorized = int(Traversal) == InnerVectorizedTraversal
+              || int(Traversal) == LinearVectorizedTraversal
+              || int(Traversal) == SliceVectorizedTraversal
+  };
+
+private:
+  enum {
+    UnrollingLimit      = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
+    MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
+                       && int(OtherDerived::CoeffReadCost) != Dynamic
+                       && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
+    MayUnrollInner      = int(InnerSize) != Dynamic
+                       && int(OtherDerived::CoeffReadCost) != Dynamic
+                       && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
+  };
+
+public:
+  enum {
+    Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
+                ? (
+                    int(MayUnrollCompletely) ? int(CompleteUnrolling)
+                  : int(MayUnrollInner)      ? int(InnerUnrolling)
+                                             : int(NoUnrolling)
+                  )
+              : int(Traversal) == int(LinearVectorizedTraversal)
+                ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
+              : int(Traversal) == int(LinearTraversal)
+                ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
+              : int(NoUnrolling)
+  };
+
+#ifdef EIGEN_DEBUG_ASSIGN
+  static void debug()
+  {
+    EIGEN_DEBUG_VAR(DstIsAligned)
+    EIGEN_DEBUG_VAR(SrcIsAligned)
+    EIGEN_DEBUG_VAR(JointAlignment)
+    EIGEN_DEBUG_VAR(InnerSize)
+    EIGEN_DEBUG_VAR(InnerMaxSize)
+    EIGEN_DEBUG_VAR(PacketSize)
+    EIGEN_DEBUG_VAR(StorageOrdersAgree)
+    EIGEN_DEBUG_VAR(MightVectorize)
+    EIGEN_DEBUG_VAR(MayLinearize)
+    EIGEN_DEBUG_VAR(MayInnerVectorize)
+    EIGEN_DEBUG_VAR(MayLinearVectorize)
+    EIGEN_DEBUG_VAR(MaySliceVectorize)
+    EIGEN_DEBUG_VAR(Traversal)
+    EIGEN_DEBUG_VAR(UnrollingLimit)
+    EIGEN_DEBUG_VAR(MayUnrollCompletely)
+    EIGEN_DEBUG_VAR(MayUnrollInner)
+    EIGEN_DEBUG_VAR(Unrolling)
+  }
+#endif
+};
+
+/***************************************************************************
+* Part 2 : meta-unrollers
+***************************************************************************/
+
+/************************
+*** Default traversal ***
+************************/
+
+template<typename Derived1, typename Derived2, int Index, int Stop>
+struct assign_DefaultTraversal_CompleteUnrolling
+{
+  enum {
+    outer = Index / Derived1::InnerSizeAtCompileTime,
+    inner = Index % Derived1::InnerSizeAtCompileTime
+  };
+
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  {
+    dst.copyCoeffByOuterInner(outer, inner, src);
+    assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
+  }
+};
+
+template<typename Derived1, typename Derived2, int Stop>
+struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
+{
+  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
+};
+
+template<typename Derived1, typename Derived2, int Index, int Stop>
+struct assign_DefaultTraversal_InnerUnrolling
+{
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
+  {
+    dst.copyCoeffByOuterInner(outer, Index, src);
+    assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
+  }
+};
+
+template<typename Derived1, typename Derived2, int Stop>
+struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
+{
+  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
+};
+
+/***********************
+*** Linear traversal ***
+***********************/
+
+template<typename Derived1, typename Derived2, int Index, int Stop>
+struct assign_LinearTraversal_CompleteUnrolling
+{
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  {
+    dst.copyCoeff(Index, src);
+    assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
+  }
+};
+
+template<typename Derived1, typename Derived2, int Stop>
+struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
+{
+  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
+};
+
+/**************************
+*** Inner vectorization ***
+**************************/
+
+template<typename Derived1, typename Derived2, int Index, int Stop>
+struct assign_innervec_CompleteUnrolling
+{
+  enum {
+    outer = Index / Derived1::InnerSizeAtCompileTime,
+    inner = Index % Derived1::InnerSizeAtCompileTime,
+    JointAlignment = assign_traits<Derived1,Derived2>::JointAlignment
+  };
+
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  {
+    dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
+    assign_innervec_CompleteUnrolling<Derived1, Derived2,
+      Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
+  }
+};
+
+template<typename Derived1, typename Derived2, int Stop>
+struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
+{
+  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
+};
+
+template<typename Derived1, typename Derived2, int Index, int Stop>
+struct assign_innervec_InnerUnrolling
+{
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
+  {
+    dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
+    assign_innervec_InnerUnrolling<Derived1, Derived2,
+      Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, outer);
+  }
+};
+
+template<typename Derived1, typename Derived2, int Stop>
+struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
+{
+  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
+};
+
+/***************************************************************************
+* Part 3 : implementation of all cases
+***************************************************************************/
+
+template<typename Derived1, typename Derived2,
+         int Traversal = assign_traits<Derived1, Derived2>::Traversal,
+         int Unrolling = assign_traits<Derived1, Derived2>::Unrolling,
+         int Version = Specialized>
+struct assign_impl;
+
+/************************
+*** Default traversal ***
+************************/
+
+template<typename Derived1, typename Derived2, int Unrolling, int Version>
+struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
+{
+  static inline void run(Derived1 &, const Derived2 &) { }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
+{
+  typedef typename Derived1::Index Index;
+  static inline void run(Derived1 &dst, const Derived2 &src)
+  {
+    const Index innerSize = dst.innerSize();
+    const Index outerSize = dst.outerSize();
+    for(Index outer = 0; outer < outerSize; ++outer)
+      for(Index inner = 0; inner < innerSize; ++inner)
+        dst.copyCoeffByOuterInner(outer, inner, src);
+  }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
+{
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  {
+    assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
+      ::run(dst, src);
+  }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
+{
+  typedef typename Derived1::Index Index;
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  {
+    const Index outerSize = dst.outerSize();
+    for(Index outer = 0; outer < outerSize; ++outer)
+      assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
+        ::run(dst, src, outer);
+  }
+};
+
+/***********************
+*** Linear traversal ***
+***********************/
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
+{
+  typedef typename Derived1::Index Index;
+  static inline void run(Derived1 &dst, const Derived2 &src)
+  {
+    const Index size = dst.size();
+    for(Index i = 0; i < size; ++i)
+      dst.copyCoeff(i, src);
+  }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
+{
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  {
+    assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
+      ::run(dst, src);
+  }
+};
+
+/**************************
+*** Inner vectorization ***
+**************************/
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Version>
+{
+  typedef typename Derived1::Index Index;
+  static inline void run(Derived1 &dst, const Derived2 &src)
+  {
+    const Index innerSize = dst.innerSize();
+    const Index outerSize = dst.outerSize();
+    const Index packetSize = packet_traits<typename Derived1::Scalar>::size;
+    for(Index outer = 0; outer < outerSize; ++outer)
+      for(Index inner = 0; inner < innerSize; inner+=packetSize)
+        dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, inner, src);
+  }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling, Version>
+{
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  {
+    assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
+      ::run(dst, src);
+  }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling, Version>
+{
+  typedef typename Derived1::Index Index;
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  {
+    const Index outerSize = dst.outerSize();
+    for(Index outer = 0; outer < outerSize; ++outer)
+      assign_innervec_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
+        ::run(dst, src, outer);
+  }
+};
+
+/***************************
+*** Linear vectorization ***
+***************************/
+
+template <bool IsAligned = false>
+struct unaligned_assign_impl
+{
+  template <typename Derived, typename OtherDerived>
+  static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {}
+};
+
+template <>
+struct unaligned_assign_impl<false>
+{
+  // MSVC must not inline this functions. If it does, it fails to optimize the
+  // packet access path.
+#ifdef _MSC_VER
+  template <typename Derived, typename OtherDerived>
+  static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end)
+#else
+  template <typename Derived, typename OtherDerived>
+  static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end)
+#endif
+  {
+    for (typename Derived::Index index = start; index < end; ++index)
+      dst.copyCoeff(index, src);
+  }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, Version>
+{
+  typedef typename Derived1::Index Index;
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  {
+    const Index size = dst.size();
+    typedef packet_traits<typename Derived1::Scalar> PacketTraits;
+    enum {
+      packetSize = PacketTraits::size,
+      dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
+      srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
+    };
+    const Index alignedStart = assign_traits<Derived1,Derived2>::DstIsAligned ? 0
+                             : internal::first_aligned(&dst.coeffRef(0), size);
+    const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
+
+    unaligned_assign_impl<assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
+
+    for(Index index = alignedStart; index < alignedEnd; index += packetSize)
+    {
+      dst.template copyPacket<Derived2, dstAlignment, srcAlignment>(index, src);
+    }
+
+    unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
+  }
+};
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling, Version>
+{
+  typedef typename Derived1::Index Index;
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
+  {
+    enum { size = Derived1::SizeAtCompileTime,
+           packetSize = packet_traits<typename Derived1::Scalar>::size,
+           alignedSize = (size/packetSize)*packetSize };
+
+    assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src);
+    assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
+  }
+};
+
+/**************************
+*** Slice vectorization ***
+***************************/
+
+template<typename Derived1, typename Derived2, int Version>
+struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Version>
+{
+  typedef typename Derived1::Index Index;
+  static inline void run(Derived1 &dst, const Derived2 &src)
+  {
+    typedef packet_traits<typename Derived1::Scalar> PacketTraits;
+    enum {
+      packetSize = PacketTraits::size,
+      alignable = PacketTraits::AlignedOnScalar,
+      dstAlignment = alignable ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
+      srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
+    };
+    const Index packetAlignedMask = packetSize - 1;
+    const Index innerSize = dst.innerSize();
+    const Index outerSize = dst.outerSize();
+    const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
+    Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
+                       : internal::first_aligned(&dst.coeffRef(0,0), innerSize);
+
+    for(Index outer = 0; outer < outerSize; ++outer)
+    {
+      const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
+      // do the non-vectorizable part of the assignment
+      for(Index inner = 0; inner<alignedStart ; ++inner)
+        dst.copyCoeffByOuterInner(outer, inner, src);
+
+      // do the vectorizable part of the assignment
+      for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
+        dst.template copyPacketByOuterInner<Derived2, dstAlignment, Unaligned>(outer, inner, src);
+
+      // do the non-vectorizable part of the assignment
+      for(Index inner = alignedEnd; inner<innerSize ; ++inner)
+        dst.copyCoeffByOuterInner(outer, inner, src);
+
+      alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
+    }
+  }
+};
+
+} // end namespace internal
+
+/***************************************************************************
+* Part 4 : implementation of DenseBase methods
+***************************************************************************/
+
 template<typename Derived>
 template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
@@ -27,61 +492,89 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
  EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)

+#ifdef EIGEN_DEBUG_ASSIGN
+  internal::assign_traits<Derived, OtherDerived>::debug();
+#endif
  eigen_assert(rows() == other.rows() && cols() == other.cols());
-  internal::call_assignment_no_alias(derived(),other.derived());
-  
+  internal::assign_impl<Derived, OtherDerived, int(SameType) ? int(internal::assign_traits<Derived, OtherDerived>::Traversal)
+                                                       : int(InvalidTraversal)>::run(derived(),other.derived());
+#ifndef EIGEN_NO_DEBUG
+  checkTransposeAliasing(other.derived());
+#endif
  return derived();
 }

+namespace internal {
+
+template<typename Derived, typename OtherDerived,
+         bool EvalBeforeAssigning = (int(OtherDerived::Flags) & EvalBeforeAssigningBit) != 0,
+         bool NeedToTranspose = Derived::IsVectorAtCompileTime
+                && OtherDerived::IsVectorAtCompileTime
+                && ((int(Derived::RowsAtCompileTime) == 1 && int(OtherDerived::ColsAtCompileTime) == 1)
+                      |  // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
+                         // revert to || as soon as not needed anymore.
+                    (int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1))
+                && int(Derived::SizeAtCompileTime) != 1>
+struct assign_selector;
+
+template<typename Derived, typename OtherDerived>
+struct assign_selector<Derived,OtherDerived,false,false> {
+  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
+};
+template<typename Derived, typename OtherDerived>
+struct assign_selector<Derived,OtherDerived,true,false> {
+  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
+};
+template<typename Derived, typename OtherDerived>
+struct assign_selector<Derived,OtherDerived,false,true> {
+  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
+};
+template<typename Derived, typename OtherDerived>
+struct assign_selector<Derived,OtherDerived,true,true> {
+  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
+};
+
+} // end namespace internal
+
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
 {
-  internal::call_assignment(derived(), other.derived());
-  return derived();
+  return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
 }

 template<typename Derived>
-EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
 {
-  internal::call_assignment(derived(), other.derived());
-  return derived();
+  return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
 }

 template<typename Derived>
-EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
 {
-  internal::call_assignment(derived(), other.derived());
-  return derived();
+  return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
 }

 template<typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
 {
-  internal::call_assignment(derived(), other.derived());
-  return derived();
+  return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
 }

 template<typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
 {
-  internal::call_assignment(derived(), other.derived());
+  other.derived().evalTo(derived());
  return derived();
 }

 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
 {
-  other.derived().evalTo(derived());
+  other.evalTo(derived());
  return derived();
 }

--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -1,828 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
-// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_ASSIGN_EVALUATOR_H
-#define EIGEN_ASSIGN_EVALUATOR_H
-
-namespace Eigen {
-
-// This implementation is based on Assign.h
-
-namespace internal {
-  
-/***************************************************************************
-* Part 1 : the logic deciding a strategy for traversal and unrolling       *
-***************************************************************************/
-
-// copy_using_evaluator_traits is based on assign_traits
-
-template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
-struct copy_using_evaluator_traits
-{
-  typedef typename DstEvaluator::XprType Dst;
-  typedef typename Dst::Scalar DstScalar;
-  // TODO distinguish between linear traversal and inner-traversals
-  typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type PacketType; 
-  
-  enum {
-    DstFlags = DstEvaluator::Flags,
-    SrcFlags = SrcEvaluator::Flags,
-    RequiredAlignment = unpacket_traits<PacketType>::alignment
-  };
-  
-public:
-  enum {
-    DstAlignment = DstEvaluator::Alignment,
-    SrcAlignment = SrcEvaluator::Alignment,
-    DstHasDirectAccess = DstFlags & DirectAccessBit,
-    JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
-  };
-
-private:
-  enum {
-    InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
-              : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
-              : int(Dst::RowsAtCompileTime),
-    InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
-              : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
-              : int(Dst::MaxRowsAtCompileTime),
-    MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
-    PacketSize = unpacket_traits<PacketType>::size
-  };
-
-  enum {
-    DstIsRowMajor = DstFlags&RowMajorBit,
-    SrcIsRowMajor = SrcFlags&RowMajorBit,
-    StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
-    MightVectorize = StorageOrdersAgree
-                  && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
-                  && (functor_traits<AssignFunc>::PacketAccess),
-    MayInnerVectorize  = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
-                       && int(JointAlignment)>=int(RequiredAlignment),
-    MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
-    MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
-                       && ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
-      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
-         so it's only good for large enough sizes. */
-    MaySliceVectorize  = MightVectorize && DstHasDirectAccess
-                       && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
-      /* slice vectorization can be slow, so we only want it if the slices are big, which is
-         indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
-         in a fixed-size matrix */
-  };
-
-public:
-  enum {
-    Traversal = int(MayInnerVectorize)   ? int(InnerVectorizedTraversal)
-              : int(MayLinearVectorize)  ? int(LinearVectorizedTraversal)
-              : int(MaySliceVectorize)   ? int(SliceVectorizedTraversal)
-              : int(MayLinearize)        ? int(LinearTraversal)
-                                         : int(DefaultTraversal),
-    Vectorized = int(Traversal) == InnerVectorizedTraversal
-              || int(Traversal) == LinearVectorizedTraversal
-              || int(Traversal) == SliceVectorizedTraversal
-  };
-
-private:
-  enum {
-    UnrollingLimit      = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
-    MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
-                       && int(SrcEvaluator::CoeffReadCost) != Dynamic
-                       && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit),
-    MayUnrollInner      = int(InnerSize) != Dynamic
-                       && int(SrcEvaluator::CoeffReadCost) != Dynamic
-                       && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit)
-  };
-
-public:
-  enum {
-    Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
-                ? (
-                    int(MayUnrollCompletely) ? int(CompleteUnrolling)
-                  : int(MayUnrollInner)      ? int(InnerUnrolling)
-                                             : int(NoUnrolling)
-                  )
-              : int(Traversal) == int(LinearVectorizedTraversal)
-                ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling)
-                                                                                             : int(NoUnrolling) )
-              : int(Traversal) == int(LinearTraversal)
-                ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) 
-                                              : int(NoUnrolling) )
-              : int(NoUnrolling)
-  };
-
-#ifdef EIGEN_DEBUG_ASSIGN
-  static void debug()
-  {
-    std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
-    std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
-    std::cerr.setf(std::ios::hex, std::ios::basefield);
-    EIGEN_DEBUG_VAR(DstFlags)
-    EIGEN_DEBUG_VAR(SrcFlags)
-    std::cerr.unsetf(std::ios::hex);
-    EIGEN_DEBUG_VAR(DstAlignment)
-    EIGEN_DEBUG_VAR(SrcAlignment)
-    EIGEN_DEBUG_VAR(RequiredAlignment)
-    EIGEN_DEBUG_VAR(JointAlignment)
-    EIGEN_DEBUG_VAR(InnerSize)
-    EIGEN_DEBUG_VAR(InnerMaxSize)
-    EIGEN_DEBUG_VAR(PacketSize)
-    EIGEN_DEBUG_VAR(StorageOrdersAgree)
-    EIGEN_DEBUG_VAR(MightVectorize)
-    EIGEN_DEBUG_VAR(MayLinearize)
-    EIGEN_DEBUG_VAR(MayInnerVectorize)
-    EIGEN_DEBUG_VAR(MayLinearVectorize)
-    EIGEN_DEBUG_VAR(MaySliceVectorize)
-    EIGEN_DEBUG_VAR(Traversal)
-    EIGEN_DEBUG_VAR(UnrollingLimit)
-    EIGEN_DEBUG_VAR(MayUnrollCompletely)
-    EIGEN_DEBUG_VAR(MayUnrollInner)
-    EIGEN_DEBUG_VAR(Unrolling)
-    std::cerr << std::endl;
-  }
-#endif
-};
-
-/***************************************************************************
-* Part 2 : meta-unrollers
-***************************************************************************/
-
-/************************
-*** Default traversal ***
-************************/
-
-template<typename Kernel, int Index, int Stop>
-struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
-{
-  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
-  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
-  typedef typename DstEvaluatorType::XprType DstXprType;
-  
-  enum {
-    outer = Index / DstXprType::InnerSizeAtCompileTime,
-    inner = Index % DstXprType::InnerSizeAtCompileTime
-  };
-
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
-  {
-    kernel.assignCoeffByOuterInner(outer, inner);
-    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
-  }
-};
-
-template<typename Kernel, int Stop>
-struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
-{
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
-};
-
-template<typename Kernel, int Index_, int Stop>
-struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
-{
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
-  {
-    kernel.assignCoeffByOuterInner(outer, Index_);
-    copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
-  }
-};
-
-template<typename Kernel, int Stop>
-struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
-{
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
-};
-
-/***********************
-*** Linear traversal ***
-***********************/
-
-template<typename Kernel, int Index, int Stop>
-struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
-{
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
-  {
-    kernel.assignCoeff(Index);
-    copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
-  }
-};
-
-template<typename Kernel, int Stop>
-struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
-{
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
-};
-
-/**************************
-*** Inner vectorization ***
-**************************/
-
-template<typename Kernel, int Index, int Stop>
-struct copy_using_evaluator_innervec_CompleteUnrolling
-{
-  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
-  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
-  typedef typename DstEvaluatorType::XprType DstXprType;
-  typedef typename Kernel::PacketType PacketType;
-  
-  enum {
-    outer = Index / DstXprType::InnerSizeAtCompileTime,
-    inner = Index % DstXprType::InnerSizeAtCompileTime,
-    JointAlignment = Kernel::AssignmentTraits::JointAlignment
-  };
-
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
-  {
-    kernel.template assignPacketByOuterInner<Aligned, JointAlignment, PacketType>(outer, inner);
-    enum { NextIndex = Index + unpacket_traits<PacketType>::size };
-    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
-  }
-};
-
-template<typename Kernel, int Stop>
-struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
-{
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
-};
-
-template<typename Kernel, int Index_, int Stop>
-struct copy_using_evaluator_innervec_InnerUnrolling
-{
-  typedef typename Kernel::PacketType PacketType;
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
-  {
-    kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, Index_);
-    enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
-    copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
-  }
-};
-
-template<typename Kernel, int Stop>
-struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
-{
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
-};
-
-/***************************************************************************
-* Part 3 : implementation of all cases
-***************************************************************************/
-
-// dense_assignment_loop is based on assign_impl
-
-template<typename Kernel,
-         int Traversal = Kernel::AssignmentTraits::Traversal,
-         int Unrolling = Kernel::AssignmentTraits::Unrolling>
-struct dense_assignment_loop;
-
-/************************
-*** Default traversal ***
-************************/
-
-template<typename Kernel>
-struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
-{
-  EIGEN_DEVICE_FUNC static void run(Kernel &kernel)
-  {
-    for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
-      for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
-        kernel.assignCoeffByOuterInner(outer, inner);
-      }
-    }
-  }
-};
-
-template<typename Kernel>
-struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
-{
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
-  {
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
-    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
-  }
-};
-
-template<typename Kernel>
-struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
-{
-  typedef typename Kernel::StorageIndex StorageIndex;
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
-  {
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
-
-    const Index outerSize = kernel.outerSize();
-    for(Index outer = 0; outer < outerSize; ++outer)
-      copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
-  }
-};
-
-/***************************
-*** Linear vectorization ***
-***************************/
-
-
-// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
-// of the non vectorizable beginning and ending parts
-
-template <bool IsAligned = false>
-struct unaligned_dense_assignment_loop
-{
-  // if IsAligned = true, then do nothing
-  template <typename Kernel>
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
-};
-
-template <>
-struct unaligned_dense_assignment_loop<false>
-{
-  // MSVC must not inline this functions. If it does, it fails to optimize the
-  // packet access path.
-  // FIXME check which version exhibits this issue
-#if EIGEN_COMP_MSVC
-  template <typename Kernel>
-  static EIGEN_DONT_INLINE void run(Kernel &kernel,
-                                    Index start,
-                                    Index end)
-#else
-  template <typename Kernel>
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
-                                      Index start,
-                                      Index end)
-#endif
-  {
-    for (Index index = start; index < end; ++index)
-      kernel.assignCoeff(index);
-  }
-};
-
-template<typename Kernel>
-struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
-{
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
-  {
-    const Index size = kernel.size();
-    typedef typename Kernel::Scalar Scalar;
-    typedef typename Kernel::PacketType PacketType;
-    enum {
-      requestedAlignment = Kernel::AssignmentTraits::RequiredAlignment,
-      packetSize = unpacket_traits<PacketType>::size,
-      dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
-      dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
-                                                            : int(Kernel::AssignmentTraits::DstAlignment),
-      srcAlignment = Kernel::AssignmentTraits::JointAlignment
-    };
-    const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(&kernel.dstEvaluator().coeffRef(0), size);
-    const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
-
-    unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
-
-    for(Index index = alignedStart; index < alignedEnd; index += packetSize)
-      kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
-
-    unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
-  }
-};
-
-template<typename Kernel>
-struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
-{
-  typedef typename Kernel::StorageIndex StorageIndex;
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
-  {
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
-    
-    enum { size = DstXprType::SizeAtCompileTime,
-           packetSize = packet_traits<typename Kernel::Scalar>::size,
-           alignedSize = (size/packetSize)*packetSize };
-
-    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
-    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
-  }
-};
-
-/**************************
-*** Inner vectorization ***
-**************************/
-
-template<typename Kernel>
-struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
-{
-  typedef typename Kernel::PacketType PacketType;
-  EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
-  {
-    const Index innerSize = kernel.innerSize();
-    const Index outerSize = kernel.outerSize();
-    const Index packetSize = unpacket_traits<PacketType>::size;
-    for(Index outer = 0; outer < outerSize; ++outer)
-      for(Index inner = 0; inner < innerSize; inner+=packetSize)
-        kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, inner);
-  }
-};
-
-template<typename Kernel>
-struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
-{
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
-  {
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
-    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
-  }
-};
-
-template<typename Kernel>
-struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
-{
-  typedef typename Kernel::StorageIndex StorageIndex;
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
-  {
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
-    const Index outerSize = kernel.outerSize();
-    for(Index outer = 0; outer < outerSize; ++outer)
-      copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
-  }
-};
-
-/***********************
-*** Linear traversal ***
-***********************/
-
-template<typename Kernel>
-struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
-{
-  EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
-  {
-    const Index size = kernel.size();
-    for(Index i = 0; i < size; ++i)
-      kernel.assignCoeff(i);
-  }
-};
-
-template<typename Kernel>
-struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
-{
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
-  {
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
-    copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
-  }
-};
-
-/**************************
-*** Slice vectorization ***
-***************************/
-
-template<typename Kernel>
-struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
-{
-  EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
-  {
-    typedef typename Kernel::Scalar Scalar;
-    typedef typename Kernel::PacketType PacketType;
-    enum {
-      packetSize = unpacket_traits<PacketType>::size,
-      requestedAlignment = int(Kernel::AssignmentTraits::RequiredAlignment),
-      alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
-      dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
-      dstAlignment = alignable ? int(requestedAlignment)
-                               : int(Kernel::AssignmentTraits::DstAlignment)
-    };
-    const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
-    if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0)
-    {
-      // the pointer is not aligend-on scalar, so alignment is not possible
-      return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
-    }
-    const Index packetAlignedMask = packetSize - 1;
-    const Index innerSize = kernel.innerSize();
-    const Index outerSize = kernel.outerSize();
-    const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
-    Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
-
-    for(Index outer = 0; outer < outerSize; ++outer)
-    {
-      const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
-      // do the non-vectorizable part of the assignment
-      for(Index inner = 0; inner<alignedStart ; ++inner)
-        kernel.assignCoeffByOuterInner(outer, inner);
-
-      // do the vectorizable part of the assignment
-      for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
-        kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
-
-      // do the non-vectorizable part of the assignment
-      for(Index inner = alignedEnd; inner<innerSize ; ++inner)
-        kernel.assignCoeffByOuterInner(outer, inner);
-
-      alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
-    }
-  }
-};
-
-/***************************************************************************
-* Part 4 : Generic dense assignment kernel
-***************************************************************************/
-
-// This class generalize the assignment of a coefficient (or packet) from one dense evaluator
-// to another dense writable evaluator.
-// It is parametrized by the two evaluators, and the actual assignment functor.
-// This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
-// One can customize the assignment using this generic dense_assignment_kernel with different
-// functors, or by completely overloading it, by-passing a functor.
-template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
-class generic_dense_assignment_kernel
-{
-protected:
-  typedef typename DstEvaluatorTypeT::XprType DstXprType;
-  typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
-public:
-  
-  typedef DstEvaluatorTypeT DstEvaluatorType;
-  typedef SrcEvaluatorTypeT SrcEvaluatorType;
-  typedef typename DstEvaluatorType::Scalar Scalar;
-  typedef typename DstEvaluatorType::StorageIndex StorageIndex;
-  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
-  typedef typename AssignmentTraits::PacketType PacketType;
-  
-  
-  EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
-    : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
-  {
-    #ifdef EIGEN_DEBUG_ASSIGN
-    AssignmentTraits::debug();
-    #endif
-  }
-  
-  EIGEN_DEVICE_FUNC Index size() const        { return m_dstExpr.size(); }
-  EIGEN_DEVICE_FUNC Index innerSize() const   { return m_dstExpr.innerSize(); }
-  EIGEN_DEVICE_FUNC Index outerSize() const   { return m_dstExpr.outerSize(); }
-  EIGEN_DEVICE_FUNC Index rows() const        { return m_dstExpr.rows(); }
-  EIGEN_DEVICE_FUNC Index cols() const        { return m_dstExpr.cols(); }
-  EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
-  
-  // TODO get rid of this one:
-  EIGEN_DEVICE_FUNC DstXprType& dstExpression() const { return m_dstExpr; }
-  
-  EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
-  EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
-  
-  /// Assign src(row,col) to dst(row,col) through the assignment functor.
-  EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
-  {
-    m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
-  }
-  
-  /// \sa assignCoeff(Index,Index)
-  EIGEN_DEVICE_FUNC void assignCoeff(Index index)
-  {
-    m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
-  }
-  
-  /// \sa assignCoeff(Index,Index)
-  EIGEN_DEVICE_FUNC void assignCoeffByOuterInner(Index outer, Index inner)
-  {
-    Index row = rowIndexByOuterInner(outer, inner); 
-    Index col = colIndexByOuterInner(outer, inner); 
-    assignCoeff(row, col);
-  }
-  
-  
-  template<int StoreMode, int LoadMode, typename PacketType>
-  EIGEN_DEVICE_FUNC void assignPacket(Index row, Index col)
-  {
-    m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
-  }
-  
-  template<int StoreMode, int LoadMode, typename PacketType>
-  EIGEN_DEVICE_FUNC void assignPacket(Index index)
-  {
-    m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
-  }
-  
-  template<int StoreMode, int LoadMode, typename PacketType>
-  EIGEN_DEVICE_FUNC void assignPacketByOuterInner(Index outer, Index inner)
-  {
-    Index row = rowIndexByOuterInner(outer, inner); 
-    Index col = colIndexByOuterInner(outer, inner);
-    assignPacket<StoreMode,LoadMode,PacketType>(row, col);
-  }
-  
-  EIGEN_DEVICE_FUNC static Index rowIndexByOuterInner(Index outer, Index inner)
-  {
-    typedef typename DstEvaluatorType::ExpressionTraits Traits;
-    return int(Traits::RowsAtCompileTime) == 1 ? 0
-      : int(Traits::ColsAtCompileTime) == 1 ? inner
-      : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
-      : inner;
-  }
-
-  EIGEN_DEVICE_FUNC static Index colIndexByOuterInner(Index outer, Index inner)
-  {
-    typedef typename DstEvaluatorType::ExpressionTraits Traits;
-    return int(Traits::ColsAtCompileTime) == 1 ? 0
-      : int(Traits::RowsAtCompileTime) == 1 ? inner
-      : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
-      : outer;
-  }
-  
-protected:
-  DstEvaluatorType& m_dst;
-  const SrcEvaluatorType& m_src;
-  const Functor &m_functor;
-  // TODO find a way to avoid the needs of the original expression
-  DstXprType& m_dstExpr;
-};
-
-/***************************************************************************
-* Part 5 : Entry point for dense rectangular assignment
-***************************************************************************/
-
-template<typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
-{
-  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
-  
-  typedef evaluator<DstXprType> DstEvaluatorType;
-  typedef evaluator<SrcXprType> SrcEvaluatorType;
-
-  DstEvaluatorType dstEvaluator(dst);
-  SrcEvaluatorType srcEvaluator(src);
-    
-  typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
-  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
-  
-  dense_assignment_loop<Kernel>::run(kernel);
-}
-
-template<typename DstXprType, typename SrcXprType>
-EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
-{
-  call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
-}
-
-/***************************************************************************
-* Part 6 : Generic assignment
-***************************************************************************/
-
-// Based on the respective shapes of the destination and source,
-// the class AssignmentKind determine the kind of assignment mechanism.
-// AssignmentKind must define a Kind typedef.
-template<typename DstShape, typename SrcShape> struct AssignmentKind;
-
-// Assignement kind defined in this file:
-struct Dense2Dense {};
-struct EigenBase2EigenBase {};
-
-template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
-template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
-    
-// This is the main assignment class
-template< typename DstXprType, typename SrcXprType, typename Functor,
-          typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
-          typename Scalar = typename DstXprType::Scalar>
-struct Assignment;
-
-
-// The only purpose of this call_assignment() function is to deal with noalias() / AssumeAliasing and automatic transposition.
-// Indeed, I (Gael) think that this concept of AssumeAliasing was a mistake, and it makes thing quite complicated.
-// So this intermediate function removes everything related to AssumeAliasing such that Assignment
-// does not has to bother about these annoying details.
-
-template<typename Dst, typename Src>
-EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src)
-{
-  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
-}
-template<typename Dst, typename Src>
-EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src)
-{
-  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
-}
-                     
-// Deal with AssumeAliasing
-template<typename Dst, typename Src, typename Func>
-EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==1, void*>::type = 0)
-{
-  typename plain_matrix_type<Src>::type tmp(src);
-  call_assignment_no_alias(dst, tmp, func);
-}
-
-template<typename Dst, typename Src, typename Func>
-EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==0, void*>::type = 0)
-{
-  call_assignment_no_alias(dst, src, func);
-}
-
-// by-pass AssumeAliasing
-// FIXME the const version should probably not be needed
-// When there is no aliasing, we require that 'dst' has been properly resized
-template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
-EIGEN_DEVICE_FUNC void call_assignment(const NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
-{
-  call_assignment_no_alias(dst.expression(), src, func);
-}
-template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
-EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
-{
-  call_assignment_no_alias(dst.expression(), src, func);
-}
-
-
-template<typename Dst, typename Src, typename Func>
-EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
-{
-  enum {
-    NeedToTranspose = (  (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
-                        |   // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
-                                // revert to || as soon as not needed anymore.
-                         (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1))
-                     && int(Dst::SizeAtCompileTime) != 1
-  };
-
-  Index dstRows = NeedToTranspose ? src.cols() : src.rows();
-  Index dstCols = NeedToTranspose ? src.rows() : src.cols();
-  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
-    dst.resize(dstRows, dstCols);
-  
-  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
-  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
-  ActualDstType actualDst(dst);
-  
-  // TODO check whether this is the right place to perform these checks:
-  EIGEN_STATIC_ASSERT_LVALUE(Dst)
-  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
-
-  // TODO this line is commented to allow matrix = permutation
-  // Actually, the "Scalar" type for a permutation matrix does not really make sense,
-  // perhaps it could be void, and EIGEN_CHECK_BINARY_COMPATIBILIY could allow micing void with anything...?
-//   EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
-  
-  Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
-}
-template<typename Dst, typename Src>
-EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src)
-{
-  call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
-}
-
-template<typename Dst, typename Src, typename Func>
-EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
-{
-  Index dstRows = src.rows();
-  Index dstCols = src.cols();
-  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
-    dst.resize(dstRows, dstCols);
-  
-  // TODO check whether this is the right place to perform these checks:
-  EIGEN_STATIC_ASSERT_LVALUE(Dst)
-  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
-  
-  Assignment<Dst,Src,Func>::run(dst, src, func);
-}
-template<typename Dst, typename Src>
-EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
-{
-  call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
-}
-
-// forward declaration
-template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
-
-// Generic Dense to Dense assignment
-template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
-struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
-{
-  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
-  {
-    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
-    
-#ifndef EIGEN_NO_DEBUG
-    internal::check_for_aliasing(dst, src);
-#endif
-    
-    call_dense_assignment_loop(dst, src, func);
-  }
-};
-
-// Generic assignment through evalTo.
-// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
-template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
-struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
-{
-  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
-  {
-    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
-    src.evalTo(dst);
-  }
-};
-
-} // namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_ASSIGN_EVALUATOR_H
--- a/Eigen/src/Core/Assign_MKL.h
+++ b/Eigen/src/Core/Assign_MKL.h
@@ -1,7 +1,6 @@
 /*
 Copyright (c) 2011, Intel Corporation. All rights reserved.
- Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
- 
+
 Redistribution and use in source and binary forms, with or without modification,
 are permitted provided that the following conditions are met:

@@ -38,13 +37,17 @@ namespace Eigen {

 namespace internal {

-template<typename Dst, typename Src>
+template<typename Op> struct vml_call
+{ enum { IsSupported = 0 }; };
+
+template<typename Dst, typename Src, typename UnaryOp>
 class vml_assign_traits
 {
  private:
    enum {
      DstHasDirectAccess = Dst::Flags & DirectAccessBit,
      SrcHasDirectAccess = Src::Flags & DirectAccessBit,
+
      StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
      InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
                : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
@@ -54,118 +57,165 @@ class vml_assign_traits
                    : int(Dst::MaxRowsAtCompileTime),
      MaxSizeAtCompileTime = Dst::SizeAtCompileTime,

-      MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
+      MightEnableVml =  vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
+                     && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
      MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
      VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
-      LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD
+      LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
+      MayEnableVml = MightEnableVml && LargeEnough,
+      MayLinearize = MayEnableVml && MightLinearize
    };
  public:
    enum {
-      EnableVml = MightEnableVml && LargeEnough,
-      Traversal = MightLinearize ? LinearTraversal : DefaultTraversal
+      Traversal = MayLinearize ? LinearVectorizedTraversal
+                : MayEnableVml ? InnerVectorizedTraversal
+                : DefaultTraversal
    };
 };

-#define EIGEN_PP_EXPAND(ARG) ARG
+template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
+         int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
+struct vml_assign_impl
+  : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
+{
+};
+
+template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
+struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
+{
+  typedef typename Derived1::Scalar Scalar;
+  typedef typename Derived1::Index Index;
+  static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
+  {
+    // in case we want to (or have to) skip VML at runtime we can call:
+    // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
+    const Index innerSize = dst.innerSize();
+    const Index outerSize = dst.outerSize();
+    for(Index outer = 0; outer < outerSize; ++outer) {
+      const Scalar *src_ptr = src.IsRowMajor ?  &(src.nestedExpression().coeffRef(outer,0)) :
+                                                &(src.nestedExpression().coeffRef(0, outer));
+      Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
+      vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
+    }
+  }
+};
+
+template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
+struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
+{
+  static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
+  {
+    // in case we want to (or have to) skip VML at runtime we can call:
+    // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
+    vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
+  }
+};
+
+// Macroses
+
+#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
+  template<typename Derived1, typename Derived2, typename UnaryOp> \
+  struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized>  {  \
+    static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
+      vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
+    } \
+  };
+
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
+EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
+
+
 #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
-#define EIGEN_VMLMODE_EXPAND_LA , VML_HA
+#define  EIGEN_MKL_VML_MODE VML_HA
 #else
-#define EIGEN_VMLMODE_EXPAND_LA , VML_LA
+#define  EIGEN_MKL_VML_MODE VML_LA
 #endif

-#define EIGEN_VMLMODE_EXPAND__ 
-
-#define EIGEN_VMLMODE_PREFIX_LA vm
-#define EIGEN_VMLMODE_PREFIX__  v
-#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE)
-
-#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)                                           \
-  template< typename DstXprType, typename SrcXprNested>                                                                         \
-  struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE>,             \
-                   Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml,EIGENTYPE>::type> {    \
-    typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType;                                            \
-    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE> &/*func*/) {                             \
-      eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                                       \
-      if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) {                                              \
-        VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(),                                                        \
-              (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) );                                           \
-      } else {                                                                                                                  \
-        const Index outerSize = dst.outerSize();                                                                                \
-        for(Index outer = 0; outer < outerSize; ++outer) {                                                                      \
-          const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :                             \
-                                                      &(src.nestedExpression().coeffRef(0, outer));                             \
-          EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));                           \
-          VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr,                                                                      \
-                (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE));                                             \
-        }                                                                                                                       \
-      }                                                                                                                         \
-    }                                                                                                                           \
-  };                                                                                                                            \
-
-
-#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)                                                         \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE)           \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE)
-
-#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)                                                         \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE)
-  
-#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE)                                                              \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)                                                               \
-  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
-
-  
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin,   Sin,   LA)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin,  Asin,  LA)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh,  Sinh,  LA)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos,   Cos,   LA)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos,  Acos,  LA)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh,  Cosh,  LA)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan,   Tan,   LA)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan,  Atan,  LA)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh,  Tanh,  LA)
-// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs,   Abs,    _)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp,   Exp,   LA)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log,   Ln,    LA)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt,  Sqrt,  _)
-
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr,   _)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg,      _)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round,  _)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor,  _)
-EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil,  Ceil,   _)
-
-#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)                                           \
-  template< typename DstXprType, typename SrcXprNested>                                                                       \
-  struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE>,           \
-                   Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml,EIGENTYPE>::type> {  \
-    typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType;                                          \
-    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE> &/*func*/) {                           \
-      eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                                     \
-      VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.functor().m_exponent);                                          \
-      if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal)                                              \
-      {                                                                                                                       \
-        VMLOP( dst.size(), (const VMLTYPE*)src.nestedExpression().data(), exponent,                                           \
-              (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) );                                         \
-      } else {                                                                                                                \
-        const Index outerSize = dst.outerSize();                                                                              \
-        for(Index outer = 0; outer < outerSize; ++outer) {                                                                    \
-          const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :                           \
-                                                      &(src.nestedExpression().coeffRef(0, outer));                           \
-          EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));                         \
-          VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent,                                                          \
-                 (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE));                                          \
-        }                                                                                                                     \
-      }                                                                                                                       \
-    }                                                                                                                         \
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)     \
+  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
+    enum { IsSupported = 1 };                                                    \
+    static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/,        \
+                            int size, const EIGENTYPE* src, EIGENTYPE* dst) {    \
+      VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst);                           \
+    }                                                                            \
  };
-  
-EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float,    float,         LA)
-EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double,   double,        LA)
-EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8,  LA)
-EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA)
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)  \
+  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
+    enum { IsSupported = 1 };                                                    \
+    static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/,        \
+                            int size, const EIGENTYPE* src, EIGENTYPE* dst) {    \
+      MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE;                                    \
+      VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode);                  \
+    }                                                                            \
+  };
+
+#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)       \
+  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
+    enum { IsSupported = 1 };                                                    \
+    static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func,        \
+                          int size, const EIGENTYPE* src, EIGENTYPE* dst) {      \
+      EIGENTYPE exponent = func.m_exponent;                                      \
+      MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE;                                    \
+      VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent,               \
+                        (VMLTYPE*)dst, &vmlMode);                                \
+    }                                                                            \
+  };
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP)                   \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float)             \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)                \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8)   \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP)                        \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP)                         \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
+
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP)                \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float)         \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)             \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8)  \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP)                     \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP)                      \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
+
+
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin,  Sin)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos,  Cos)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan,  Tan)
+//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs,  Abs)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp,  Exp)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log,  Ln)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
+
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
+
+// The vm*powx functions are not avaibale in the windows version of MKL.
+#ifdef _WIN32
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
+#endif

 } // end namespace internal

--- a/Eigen/src/Core/BandMatrix.h
+++ b/Eigen/src/Core/BandMatrix.h
@@ -32,7 +32,7 @@ class BandMatrixBase : public EigenBase<Derived>
    };
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType;
-    typedef typename DenseMatrixType::StorageIndex StorageIndex;
+    typedef typename DenseMatrixType::Index Index;
    typedef typename internal::traits<Derived>::CoefficientsType CoefficientsType;
    typedef EigenBase<Derived> Base;

@@ -179,7 +179,7 @@ struct traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
 {
  typedef _Scalar Scalar;
  typedef Dense StorageKind;
-  typedef Eigen::Index StorageIndex;
+  typedef DenseIndex Index;
  enum {
    CoeffReadCost = NumTraits<Scalar>::ReadCost,
    RowsAtCompileTime = _Rows,
@@ -201,10 +201,10 @@ class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Sub
  public:

    typedef typename internal::traits<BandMatrix>::Scalar Scalar;
-    typedef typename internal::traits<BandMatrix>::StorageIndex StorageIndex;
+    typedef typename internal::traits<BandMatrix>::Index Index;
    typedef typename internal::traits<BandMatrix>::CoefficientsType CoefficientsType;

-    explicit inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs)
+    inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs)
      : m_coeffs(1+supers+subs,cols),
        m_rows(rows), m_supers(supers), m_subs(subs)
    {
@@ -241,7 +241,7 @@ struct traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Opt
 {
  typedef typename _CoefficientsType::Scalar Scalar;
  typedef typename _CoefficientsType::StorageKind StorageKind;
-  typedef typename _CoefficientsType::StorageIndex StorageIndex;
+  typedef typename _CoefficientsType::Index Index;
  enum {
    CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost,
    RowsAtCompileTime = _Rows,
@@ -264,9 +264,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT

    typedef typename internal::traits<BandMatrixWrapper>::Scalar Scalar;
    typedef typename internal::traits<BandMatrixWrapper>::CoefficientsType CoefficientsType;
-    typedef typename internal::traits<BandMatrixWrapper>::StorageIndex StorageIndex;
+    typedef typename internal::traits<BandMatrixWrapper>::Index Index;

-    explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)
+    inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)
      : m_coeffs(coeffs),
        m_rows(rows), m_supers(supers), m_subs(subs)
    {
@@ -312,9 +312,9 @@ template<typename Scalar, int Size, int Options>
 class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor>
 {
    typedef BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor> Base;
-    typedef typename Base::StorageIndex StorageIndex;
+    typedef typename Base::Index Index;
  public:
-    explicit TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {}
+    TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {}

    inline typename Base::template DiagonalIntReturnType<1>::Type super()
    { return Base::template diagonal<1>(); }
@@ -327,25 +327,6 @@ class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint
  protected:
 };

-
-struct BandShape {};
-
-template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
-struct evaluator_traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
-  : public evaluator_traits_base<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
-{
-  typedef BandShape Shape;
-};
-
-template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
-struct evaluator_traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
-  : public evaluator_traits_base<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
-{
-  typedef BandShape Shape;
-};
-
-template<> struct AssignmentKind<DenseShape,BandShape> { typedef EigenBase2EigenBase Kind; };
-
 } // end namespace internal

 } // end namespace Eigen
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -21,9 +21,7 @@ namespace Eigen {
  * \param XprType the type of the expression in which we are taking a block
  * \param BlockRows the number of rows of the block we are taking at compile time (optional)
  * \param BlockCols the number of columns of the block we are taking at compile time (optional)
-  * \param InnerPanel is true, if the block maps to a set of rows of a row major matrix or
-  *        to set of columns of a column major matrix (optional). The parameter allows to determine
-  *        at compile time whether aligned access is possible on the block expression.
+  * \param _DirectAccessStatus \internal used for partial specialization
  *
  * This class represents an expression of either a fixed-size or dynamic-size block. It is the return
  * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
@@ -49,13 +47,13 @@ namespace Eigen {
  */

 namespace internal {
-template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
-struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess>
+struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> > : traits<XprType>
 {
  typedef typename traits<XprType>::Scalar Scalar;
  typedef typename traits<XprType>::StorageKind StorageKind;
  typedef typename traits<XprType>::XprKind XprKind;
-  typedef typename ref_selector<XprType>::type XprTypeNested;
+  typedef typename nested<XprType>::type XprTypeNested;
  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
  enum{
    MatrixRows = traits<XprType>::RowsAtCompileTime,
@@ -68,7 +66,6 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
    MaxColsAtCompileTime = BlockCols==0 ? 0
                         : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
                         : int(traits<XprType>::MaxColsAtCompileTime),
-
    XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
    IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
               : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
@@ -81,111 +78,35 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
    OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
                             ? int(outer_stride_at_compile_time<XprType>::ret)
                             : int(inner_stride_at_compile_time<XprType>::ret),
-
-    // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
+    MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
+                       && (InnerStrideAtCompileTime == 1)
+                        ? PacketAccessBit : 0,
+    MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
+    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
    FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
    FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
-    Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
-    // FIXME DirectAccessBit should not be handled by expressions
-    // 
-    // Alignment is needed by MapBase's assertions
-    // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
-    Alignment = 0
+    Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
+                                        DirectAccessBit |
+                                        MaskPacketAccessBit |
+                                        MaskAlignedBit),
+    Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit
  };
 };
+}

-template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
-         bool HasDirectAccess = internal::has_direct_access<XprType>::ret> class BlockImpl_dense;
-         
-} // end namespace internal
-
-template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
-
-template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block
-  : public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class Block
+  : public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> >::type
 {
-    typedef BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind> Impl;
-  public:
-    //typedef typename Impl::Base Base;
-    typedef Impl Base;
-    EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
-    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
-    
-    typedef typename internal::remove_all<XprType>::type NestedExpression;
-  
-    /** Column or Row constructor
-      */
-    EIGEN_DEVICE_FUNC
-    inline Block(XprType& xpr, Index i) : Impl(xpr,i)
-    {
-      eigen_assert( (i>=0) && (
-          ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows())
-        ||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols())));
-    }
-
-    /** Fixed-size constructor
-      */
-    EIGEN_DEVICE_FUNC
-    inline Block(XprType& xpr, Index startRow, Index startCol)
-      : Impl(xpr, startRow, startCol)
-    {
-      EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
-      eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows()
-             && startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols());
-    }
-
-    /** Dynamic-size constructor
-      */
-    EIGEN_DEVICE_FUNC
-    inline Block(XprType& xpr,
-          Index startRow, Index startCol,
-          Index blockRows, Index blockCols)
-      : Impl(xpr, startRow, startCol, blockRows, blockCols)
-    {
-      eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
-          && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
-      eigen_assert(startRow >= 0 && blockRows >= 0 && startRow  <= xpr.rows() - blockRows
-          && startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols);
-    }
-};
-         
-// The generic default implementation for dense block simplu forward to the internal::BlockImpl_dense
-// that must be specialized for direct and non-direct access...
-template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
-class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
-  : public internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel>
-{
-    typedef internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel> Impl;
-    typedef typename XprType::StorageIndex StorageIndex;
-  public:
-    typedef Impl Base;
-    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
-    EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
-    EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {}
-    EIGEN_DEVICE_FUNC
-    inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
-      : Impl(xpr, startRow, startCol, blockRows, blockCols) {}
-};
-
-namespace internal {
-
-/** \internal Internal implementation of dense Blocks in the general case. */
-template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class BlockImpl_dense
-  : public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type
-{
-    typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
  public:

-    typedef typename internal::dense_xpr_base<BlockType>::type Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
-    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
+    typedef typename internal::dense_xpr_base<Block>::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Block)

-    // class InnerIterator; // FIXME apparently never used
+    class InnerIterator;

    /** Column or Row constructor
      */
-    EIGEN_DEVICE_FUNC
-    inline BlockImpl_dense(XprType& xpr, Index i)
+    inline Block(XprType& xpr, Index i)
      : m_xpr(xpr),
        // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,
        // and it is a column if and only if BlockRows==XprType::RowsAtCompileTime and BlockCols==1,
@@ -195,51 +116,60 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
        m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
        m_blockRows(BlockRows==1 ? 1 : xpr.rows()),
        m_blockCols(BlockCols==1 ? 1 : xpr.cols())
-    {}
+    {
+      eigen_assert( (i>=0) && (
+          ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows())
+        ||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols())));
+    }

    /** Fixed-size constructor
      */
-    EIGEN_DEVICE_FUNC
-    inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
+    inline Block(XprType& xpr, Index startRow, Index startCol)
      : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
-                    m_blockRows(BlockRows), m_blockCols(BlockCols)
-    {}
+        m_blockRows(BlockRows), m_blockCols(BlockCols)
+    {
+      EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
+      eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows()
+             && startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols());
+    }

    /** Dynamic-size constructor
      */
-    EIGEN_DEVICE_FUNC
-    inline BlockImpl_dense(XprType& xpr,
+    inline Block(XprType& xpr,
          Index startRow, Index startCol,
          Index blockRows, Index blockCols)
      : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
-                    m_blockRows(blockRows), m_blockCols(blockCols)
-    {}
+                          m_blockRows(blockRows), m_blockCols(blockCols)
+    {
+      eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
+          && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
+      eigen_assert(startRow >= 0 && blockRows >= 0 && startRow + blockRows <= xpr.rows()
+          && startCol >= 0 && blockCols >= 0 && startCol + blockCols <= xpr.cols());
+    }

-    EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
-    EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)

-    EIGEN_DEVICE_FUNC
-    inline Scalar& coeffRef(Index rowId, Index colId)
+    inline Index rows() const { return m_blockRows.value(); }
+    inline Index cols() const { return m_blockCols.value(); }
+
+    inline Scalar& coeffRef(Index row, Index col)
    {
      EIGEN_STATIC_ASSERT_LVALUE(XprType)
      return m_xpr.const_cast_derived()
-               .coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
+               .coeffRef(row + m_startRow.value(), col + m_startCol.value());
    }

-    EIGEN_DEVICE_FUNC
-    inline const Scalar& coeffRef(Index rowId, Index colId) const
+    inline const Scalar& coeffRef(Index row, Index col) const
    {
      return m_xpr.derived()
-               .coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
+               .coeffRef(row + m_startRow.value(), col + m_startCol.value());
    }

-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const
+    EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
    {
-      return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value());
+      return m_xpr.coeff(row + m_startRow.value(), col + m_startCol.value());
    }

-    EIGEN_DEVICE_FUNC
    inline Scalar& coeffRef(Index index)
    {
      EIGEN_STATIC_ASSERT_LVALUE(XprType)
@@ -248,7 +178,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
                       m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
    }

-    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index index) const
    {
      return m_xpr.const_cast_derived()
@@ -256,7 +185,6 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
                       m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
    }

-    EIGEN_DEVICE_FUNC
    inline const CoeffReturnType coeff(Index index) const
    {
      return m_xpr
@@ -265,17 +193,17 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
    }

    template<int LoadMode>
-    inline PacketScalar packet(Index rowId, Index colId) const
+    inline PacketScalar packet(Index row, Index col) const
    {
      return m_xpr.template packet<Unaligned>
-              (rowId + m_startRow.value(), colId + m_startCol.value());
+              (row + m_startRow.value(), col + m_startCol.value());
    }

    template<int LoadMode>
-    inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
+    inline void writePacket(Index row, Index col, const PacketScalar& x)
    {
      m_xpr.const_cast_derived().template writePacket<Unaligned>
-              (rowId + m_startRow.value(), colId + m_startCol.value(), val);
+              (row + m_startRow.value(), col + m_startCol.value(), x);
    }

    template<int LoadMode>
@@ -287,34 +215,31 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
    }

    template<int LoadMode>
-    inline void writePacket(Index index, const PacketScalar& val)
+    inline void writePacket(Index index, const PacketScalar& x)
    {
      m_xpr.const_cast_derived().template writePacket<Unaligned>
         (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
-          m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
+          m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), x);
    }

    #ifdef EIGEN_PARSED_BY_DOXYGEN
    /** \sa MapBase::data() */
-    EIGEN_DEVICE_FUNC inline const Scalar* data() const;
-    EIGEN_DEVICE_FUNC inline Index innerStride() const;
-    EIGEN_DEVICE_FUNC inline Index outerStride() const;
+    inline const Scalar* data() const;
+    inline Index innerStride() const;
+    inline Index outerStride() const;
    #endif

-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
+    const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const 
    { 
      return m_xpr; 
    }
      
-    EIGEN_DEVICE_FUNC
-    StorageIndex startRow() const
+    Index startRow() const 
    { 
      return m_startRow.value(); 
    }
      
-    EIGEN_DEVICE_FUNC
-    StorageIndex startCol() const
+    Index startCol() const 
    { 
      return m_startCol.value(); 
    }
@@ -322,79 +247,79 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
  protected:

    const typename XprType::Nested m_xpr;
-    const internal::variable_if_dynamic<StorageIndex, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
-    const internal::variable_if_dynamic<StorageIndex, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
-    const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;
-    const internal::variable_if_dynamic<StorageIndex, ColsAtCompileTime> m_blockCols;
+    const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
+    const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
+    const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
+    const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols;
 };

-/** \internal Internal implementation of dense Blocks in the direct access case.*/
+/** \internal */
 template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
-class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
-  : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >
+class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
+  : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel, true> >
 {
-    typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
-    enum {
-      XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
-    };
  public:

-    typedef MapBase<BlockType> Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
-    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
+    typedef MapBase<Block> Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Block)
+
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)

    /** Column or Row constructor
      */
-    EIGEN_DEVICE_FUNC
-    inline BlockImpl_dense(XprType& xpr, Index i)
-      : Base(xpr.data() + i * (    ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor)) 
-                                || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
+    inline Block(XprType& xpr, Index i)
+      : Base(internal::const_cast_ptr(&xpr.coeffRef(
+              (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0,
+              (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)),
             BlockRows==1 ? 1 : xpr.rows(),
             BlockCols==1 ? 1 : xpr.cols()),
        m_xpr(xpr)
    {
+      eigen_assert( (i>=0) && (
+          ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows())
+        ||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols())));
      init();
    }

    /** Fixed-size constructor
      */
-    EIGEN_DEVICE_FUNC
-    inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
-      : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
-        m_xpr(xpr)
+    inline Block(XprType& xpr, Index startRow, Index startCol)
+      : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr)
    {
+      eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows()
+             && startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols());
      init();
    }

    /** Dynamic-size constructor
      */
-    EIGEN_DEVICE_FUNC
-    inline BlockImpl_dense(XprType& xpr,
+    inline Block(XprType& xpr,
          Index startRow, Index startCol,
          Index blockRows, Index blockCols)
-      : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
+      : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol)), blockRows, blockCols),
        m_xpr(xpr)
    {
+      eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
+             && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
+      eigen_assert(startRow >= 0 && blockRows >= 0 && startRow + blockRows <= xpr.rows()
+             && startCol >= 0 && blockCols >= 0 && startCol + blockCols <= xpr.cols());
      init();
    }

-    EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
+    const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const 
    { 
      return m_xpr; 
    }
      
    /** \sa MapBase::innerStride() */
-    EIGEN_DEVICE_FUNC
    inline Index innerStride() const
    {
-      return internal::traits<BlockType>::HasSameStorageOrderAsXprType
+      return internal::traits<Block>::HasSameStorageOrderAsXprType
             ? m_xpr.innerStride()
             : m_xpr.outerStride();
    }

    /** \sa MapBase::outerStride() */
-    EIGEN_DEVICE_FUNC
    inline Index outerStride() const
    {
      return m_outerStride;
@@ -408,8 +333,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>

    #ifndef EIGEN_PARSED_BY_DOXYGEN
    /** \internal used by allowAligned() */
-    EIGEN_DEVICE_FUNC
-    inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
+    inline Block(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
      : Base(data, blockRows, blockCols), m_xpr(xpr)
    {
      init();
@@ -417,10 +341,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
    #endif

  protected:
-    EIGEN_DEVICE_FUNC
    void init()
    {
-      m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType
+      m_outerStride = internal::traits<Block>::HasSameStorageOrderAsXprType
                    ? m_xpr.outerStride()
                    : m_xpr.innerStride();
    }
@@ -429,8 +352,6 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
    Index m_outerStride;
 };

-} // end namespace internal
-
 } // end namespace Eigen

 #endif // EIGEN_BLOCK_H
--- a/Eigen/src/Core/BooleanRedux.h
+++ b/Eigen/src/Core/BooleanRedux.h
@@ -17,10 +17,9 @@ namespace internal {
 template<typename Derived, int UnrollCount>
 struct all_unroller
 {
-  typedef typename Derived::ExpressionTraits Traits;
  enum {
-    col = (UnrollCount-1) / Traits::RowsAtCompileTime,
-    row = (UnrollCount-1) % Traits::RowsAtCompileTime
+    col = (UnrollCount-1) / Derived::RowsAtCompileTime,
+    row = (UnrollCount-1) % Derived::RowsAtCompileTime
  };

  static inline bool run(const Derived &mat)
@@ -30,9 +29,9 @@ struct all_unroller
 };

 template<typename Derived>
-struct all_unroller<Derived, 0>
+struct all_unroller<Derived, 1>
 {
-  static inline bool run(const Derived &/*mat*/) { return true; }
+  static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
 };

 template<typename Derived>
@@ -44,12 +43,11 @@ struct all_unroller<Derived, Dynamic>
 template<typename Derived, int UnrollCount>
 struct any_unroller
 {
-  typedef typename Derived::ExpressionTraits Traits;
  enum {
-    col = (UnrollCount-1) / Traits::RowsAtCompileTime,
-    row = (UnrollCount-1) % Traits::RowsAtCompileTime
+    col = (UnrollCount-1) / Derived::RowsAtCompileTime,
+    row = (UnrollCount-1) % Derived::RowsAtCompileTime
  };
-  
+
  static inline bool run(const Derived &mat)
  {
    return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
@@ -57,9 +55,9 @@ struct any_unroller
 };

 template<typename Derived>
-struct any_unroller<Derived, 0>
+struct any_unroller<Derived, 1>
 {
-  static inline bool run(const Derived & /*mat*/) { return false; }
+  static inline bool run(const Derived &mat) { return mat.coeff(0, 0); }
 };

 template<typename Derived>
@@ -80,21 +78,21 @@ struct any_unroller<Derived, Dynamic>
 template<typename Derived>
 inline bool DenseBase<Derived>::all() const
 {
-  typedef internal::evaluator<Derived> Evaluator;
  enum {
    unroll = SizeAtCompileTime != Dynamic
-          && Evaluator::CoeffReadCost != Dynamic
+          && CoeffReadCost != Dynamic
          && NumTraits<Scalar>::AddCost != Dynamic
-          && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
+          && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
  };
-  Evaluator evaluator(derived());
  if(unroll)
-    return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
+    return internal::all_unroller<Derived,
+                           unroll ? int(SizeAtCompileTime) : Dynamic
+     >::run(derived());
  else
  {
    for(Index j = 0; j < cols(); ++j)
      for(Index i = 0; i < rows(); ++i)
-        if (!evaluator.coeff(i, j)) return false;
+        if (!coeff(i, j)) return false;
    return true;
  }
 }
@@ -106,21 +104,21 @@ inline bool DenseBase<Derived>::all() const
 template<typename Derived>
 inline bool DenseBase<Derived>::any() const
 {
-  typedef internal::evaluator<Derived> Evaluator;
  enum {
    unroll = SizeAtCompileTime != Dynamic
-          && Evaluator::CoeffReadCost != Dynamic
+          && CoeffReadCost != Dynamic
          && NumTraits<Scalar>::AddCost != Dynamic
-          && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
+          && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
  };
-  Evaluator evaluator(derived());
  if(unroll)
-    return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
+    return internal::any_unroller<Derived,
+                           unroll ? int(SizeAtCompileTime) : Dynamic
+           >::run(derived());
  else
  {
    for(Index j = 0; j < cols(); ++j)
      for(Index i = 0; i < rows(); ++i)
-        if (evaluator.coeff(i, j)) return true;
+        if (coeff(i, j)) return true;
    return false;
  }
 }
@@ -130,31 +128,11 @@ inline bool DenseBase<Derived>::any() const
  * \sa all(), any()
  */
 template<typename Derived>
-inline Eigen::Index DenseBase<Derived>::count() const
+inline typename DenseBase<Derived>::Index DenseBase<Derived>::count() const
 {
  return derived().template cast<bool>().template cast<Index>().sum();
 }

-/** \returns true is \c *this contains at least one Not A Number (NaN).
-  *
-  * \sa allFinite()
-  */
-template<typename Derived>
-inline bool DenseBase<Derived>::hasNaN() const
-{
-  return !((derived().array()==derived().array()).all());
-}
-
-/** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values.
-  *
-  * \sa hasNaN()
-  */
-template<typename Derived>
-inline bool DenseBase<Derived>::allFinite() const
-{
-  return !((derived()-derived()).hasNaN());
-}
-    
 } // end namespace Eigen

 #endif // EIGEN_ALLANDANY_H
--- a/Eigen/src/Core/CMakeLists.txt
+++ b/Eigen/src/Core/CMakeLists.txt
@@ -8,4 +8,3 @@ INSTALL(FILES
 ADD_SUBDIRECTORY(products)
 ADD_SUBDIRECTORY(util)
 ADD_SUBDIRECTORY(arch)
-ADD_SUBDIRECTORY(functors)
--- a/Eigen/src/Core/CommaInitializer.h
+++ b/Eigen/src/Core/CommaInitializer.h
@@ -28,8 +28,8 @@ template<typename XprType>
 struct CommaInitializer
 {
  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::Index Index;

-  EIGEN_DEVICE_FUNC
  inline CommaInitializer(XprType& xpr, const Scalar& s)
    : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1)
  {
@@ -37,27 +37,13 @@ struct CommaInitializer
  }

  template<typename OtherDerived>
-  EIGEN_DEVICE_FUNC
  inline CommaInitializer(XprType& xpr, const DenseBase<OtherDerived>& other)
    : m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows())
  {
    m_xpr.block(0, 0, other.rows(), other.cols()) = other;
  }

-  /* Copy/Move constructor which transfers ownership. This is crucial in 
-   * absence of return value optimization to avoid assertions during destruction. */
-  // FIXME in C++11 mode this could be replaced by a proper RValue constructor
-  EIGEN_DEVICE_FUNC
-  inline CommaInitializer(const CommaInitializer& o)
-  : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) {
-    // Mark original object as finished. In absence of R-value references we need to const_cast:
-    const_cast<CommaInitializer&>(o).m_row = m_xpr.rows();
-    const_cast<CommaInitializer&>(o).m_col = m_xpr.cols();
-    const_cast<CommaInitializer&>(o).m_currentBlockRows = 0;
-  }
-
  /* inserts a scalar value in the target matrix */
-  EIGEN_DEVICE_FUNC
  CommaInitializer& operator,(const Scalar& s)
  {
    if (m_col==m_xpr.cols())
@@ -77,7 +63,6 @@ struct CommaInitializer

  /* inserts a matrix expression in the target matrix */
  template<typename OtherDerived>
-  EIGEN_DEVICE_FUNC
  CommaInitializer& operator,(const DenseBase<OtherDerived>& other)
  {
    if(other.cols()==0 || other.rows()==0)
@@ -103,11 +88,7 @@ struct CommaInitializer
    return *this;
  }

-  EIGEN_DEVICE_FUNC
  inline ~CommaInitializer()
-#if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS
-  throw(Eigen::eigen_assert_exception)
-#endif
  {
    eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows()
         && m_col == m_xpr.cols()
@@ -121,10 +102,9 @@ struct CommaInitializer
    * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished());
    * \endcode
    */
-  EIGEN_DEVICE_FUNC
  inline XprType& finished() { return m_xpr; }

-  XprType& m_xpr;           // target expression
+  XprType& m_xpr;   // target expression
  Index m_row;              // current row id
  Index m_col;              // current col id
  Index m_currentBlockRows; // current block height
@@ -138,8 +118,6 @@ struct CommaInitializer
  *
  * Example: \include MatrixBase_set.cpp
  * Output: \verbinclude MatrixBase_set.out
-  * 
-  * \note According the c++ standard, the argument expressions of this comma initializer are evaluated in arbitrary order.
  *
  * \sa CommaInitializer::finished(), class CommaInitializer
  */
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
--- a/Eigen/src/Core/CoreIterators.h
+++ b/Eigen/src/Core/CoreIterators.h
@@ -1,127 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_COREITERATORS_H
-#define EIGEN_COREITERATORS_H
-
-namespace Eigen { 
-
-/* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core
- */
-
-namespace internal {
-
-template<typename XprType, typename EvaluatorKind>
-class inner_iterator_selector;
-
-}
-
-/** \class InnerIterator
-  * \brief An InnerIterator allows to loop over the element of any matrix expression.
-  * 
-  * \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed.
-  * 
-  * TODO: add a usage example
-  */
-template<typename XprType>
-class InnerIterator
-{
-protected:
-  typedef internal::inner_iterator_selector<XprType, typename internal::evaluator_traits<XprType>::Kind> IteratorType;
-  typedef internal::evaluator<XprType> EvaluatorType;
-  typedef typename internal::traits<XprType>::Scalar Scalar;
-public:
-  /** Construct an iterator over the \a outerId -th row or column of \a xpr */
-  InnerIterator(const XprType &xpr, const Index &outerId)
-    : m_eval(xpr), m_iter(m_eval, outerId, xpr.innerSize())
-  {}
-  
-  /// \returns the value of the current coefficient.
-  EIGEN_STRONG_INLINE Scalar value() const          { return m_iter.value(); }
-  /** Increment the iterator \c *this to the next non-zero coefficient.
-    * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView
-    */
-  EIGEN_STRONG_INLINE InnerIterator& operator++()   { m_iter.operator++(); return *this; }
-  /// \returns the column or row index of the current coefficient.
-  EIGEN_STRONG_INLINE Index index() const           { return m_iter.index(); }
-  /// \returns the row index of the current coefficient.
-  EIGEN_STRONG_INLINE Index row() const             { return m_iter.row(); }
-  /// \returns the column index of the current coefficient.
-  EIGEN_STRONG_INLINE Index col() const             { return m_iter.col(); }
-  /// \returns \c true if the iterator \c *this still references a valid coefficient.
-  EIGEN_STRONG_INLINE operator bool() const         { return m_iter; }
-  
-protected:
-  EvaluatorType m_eval;
-  IteratorType m_iter;
-private:
-  // If you get here, then you're not using the right InnerIterator type, e.g.:
-  //   SparseMatrix<double,RowMajor> A;
-  //   SparseMatrix<double>::InnerIterator it(A,0);
-  template<typename T> InnerIterator(const EigenBase<T>&,Index outer);
-};
-
-namespace internal {
-
-// Generic inner iterator implementation for dense objects
-template<typename XprType>
-class inner_iterator_selector<XprType, IndexBased>
-{
-protected:
-  typedef evaluator<XprType> EvaluatorType;
-  typedef typename traits<XprType>::Scalar Scalar;
-  enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit };
-  
-public:
-  EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &innerSize)
-    : m_eval(eval), m_inner(0), m_outer(outerId), m_end(innerSize)
-  {}
-
-  EIGEN_STRONG_INLINE Scalar value() const
-  {
-    return (IsRowMajor) ? m_eval.coeff(m_outer, m_inner)
-                        : m_eval.coeff(m_inner, m_outer);
-  }
-
-  EIGEN_STRONG_INLINE inner_iterator_selector& operator++() { m_inner++; return *this; }
-
-  EIGEN_STRONG_INLINE Index index() const { return m_inner; }
-  inline Index row() const { return IsRowMajor ? m_outer : index(); }
-  inline Index col() const { return IsRowMajor ? index() : m_outer; }
-
-  EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; }
-
-protected:
-  const EvaluatorType& m_eval;
-  Index m_inner;
-  const Index m_outer;
-  const Index m_end;
-};
-
-// For iterator-based evaluator, inner-iterator is already implemented as
-// evaluator<>::InnerIterator
-template<typename XprType>
-class inner_iterator_selector<XprType, IteratorBased>
- : public evaluator<XprType>::InnerIterator
-{
-protected:
-  typedef typename evaluator<XprType>::InnerIterator Base;
-  typedef evaluator<XprType> EvaluatorType;
-  
-public:
-  EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/)
-    : Base(eval, outerId)
-  {}  
-};
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_COREITERATORS_H
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -56,61 +56,81 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
                       typename Rhs::Scalar
                     )
                   >::type Scalar;
-  typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
-                                              typename traits<Rhs>::StorageKind,
-                                              BinaryOp>::ret StorageKind;
-  typedef typename promote_index_type<typename traits<Lhs>::StorageIndex,
-                                      typename traits<Rhs>::StorageIndex>::type StorageIndex;
+  typedef typename promote_storage_type<typename traits<Lhs>::StorageKind,
+                                           typename traits<Rhs>::StorageKind>::ret StorageKind;
+  typedef typename promote_index_type<typename traits<Lhs>::Index,
+                                         typename traits<Rhs>::Index>::type Index;
  typedef typename Lhs::Nested LhsNested;
  typedef typename Rhs::Nested RhsNested;
  typedef typename remove_reference<LhsNested>::type _LhsNested;
  typedef typename remove_reference<RhsNested>::type _RhsNested;
  enum {
-    Flags = _LhsNested::Flags & RowMajorBit
+    LhsCoeffReadCost = _LhsNested::CoeffReadCost,
+    RhsCoeffReadCost = _RhsNested::CoeffReadCost,
+    LhsFlags = _LhsNested::Flags,
+    RhsFlags = _RhsNested::Flags,
+    SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
+    StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit),
+    Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
+        HereditaryBits
+      | (int(LhsFlags) & int(RhsFlags) &
+           ( AlignedBit
+           | (StorageOrdersAgree ? LinearAccessBit : 0)
+           | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
+           )
+        )
+     ),
+    Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
+    CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits<BinaryOp>::Cost
  };
 };
 } // end namespace internal

+// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
+// that would take two operands of different types. If there were such an example, then this check should be
+// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
+// currently they take only one typename Scalar template parameter.
+// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
+// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
+// add together a float matrix and a double matrix.
+#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
+  EIGEN_STATIC_ASSERT((internal::functor_allows_mixing_real_and_complex<BINOP>::ret \
+                        ? int(internal::is_same<typename NumTraits<LHS>::Real, typename NumTraits<RHS>::Real>::value) \
+                        : int(internal::is_same<LHS, RHS>::value)), \
+    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
 template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
 class CwiseBinaryOpImpl;

-template<typename BinaryOp, typename LhsType, typename RhsType>
-class CwiseBinaryOp : 
+template<typename BinaryOp, typename Lhs, typename Rhs>
+class CwiseBinaryOp : internal::no_assignment_operator,
  public CwiseBinaryOpImpl<
-          BinaryOp, LhsType, RhsType,
-          typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
-                                                        typename internal::traits<RhsType>::StorageKind,
-                                                        BinaryOp>::ret>,
-  internal::no_assignment_operator
+          BinaryOp, Lhs, Rhs,
+          typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
+                                           typename internal::traits<Rhs>::StorageKind>::ret>
 {
  public:
-    
-    typedef typename internal::remove_all<LhsType>::type Lhs;
-    typedef typename internal::remove_all<RhsType>::type Rhs;

    typedef typename CwiseBinaryOpImpl<
-        BinaryOp, LhsType, RhsType,
-        typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
-                                                      typename internal::traits<Rhs>::StorageKind,
-                                                      BinaryOp>::ret>::Base Base;
+        BinaryOp, Lhs, Rhs,
+        typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
+                                         typename internal::traits<Rhs>::StorageKind>::ret>::Base Base;
    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)

-    typedef typename internal::ref_selector<LhsType>::type LhsNested;
-    typedef typename internal::ref_selector<RhsType>::type RhsNested;
+    typedef typename internal::nested<Lhs>::type LhsNested;
+    typedef typename internal::nested<Rhs>::type RhsNested;
    typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
    typedef typename internal::remove_reference<RhsNested>::type _RhsNested;

-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
-      : m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
+    EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& lhs, const Rhs& rhs, const BinaryOp& func = BinaryOp())
+      : m_lhs(lhs), m_rhs(rhs), m_functor(func)
    {
      EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar);
      // require the sizes to match
      EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
-      eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
+      eigen_assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols());
    }

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index rows() const {
      // return the fixed size type if available to enable compile time optimizations
      if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
@@ -118,7 +138,6 @@ class CwiseBinaryOp :
      else
        return m_lhs.rows();
    }
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index cols() const {
      // return the fixed size type if available to enable compile time optimizations
      if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
@@ -128,13 +147,10 @@ class CwiseBinaryOp :
    }

    /** \returns the left hand side nested expression */
-    EIGEN_DEVICE_FUNC
    const _LhsNested& lhs() const { return m_lhs; }
    /** \returns the right hand side nested expression */
-    EIGEN_DEVICE_FUNC
    const _RhsNested& rhs() const { return m_rhs; }
    /** \returns the functor representing the binary operation */
-    EIGEN_DEVICE_FUNC
    const BinaryOp& functor() const { return m_functor; }

  protected:
@@ -143,13 +159,41 @@ class CwiseBinaryOp :
    const BinaryOp m_functor;
 };

-// Generic API dispatcher
-template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
-class CwiseBinaryOpImpl
-  : public internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
+template<typename BinaryOp, typename Lhs, typename Rhs>
+class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
+  : public internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
 {
-public:
-  typedef typename internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
+    typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> Derived;
+  public:
+
+    typedef typename internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE( Derived )
+
+    EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
+    {
+      return derived().functor()(derived().lhs().coeff(row, col),
+                                 derived().rhs().coeff(row, col));
+    }
+
+    template<int LoadMode>
+    EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
+    {
+      return derived().functor().packetOp(derived().lhs().template packet<LoadMode>(row, col),
+                                          derived().rhs().template packet<LoadMode>(row, col));
+    }
+
+    EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
+    {
+      return derived().functor()(derived().lhs().coeff(index),
+                                 derived().rhs().coeff(index));
+    }
+
+    template<int LoadMode>
+    EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
+    {
+      return derived().functor().packetOp(derived().lhs().template packet<LoadMode>(index),
+                                          derived().rhs().template packet<LoadMode>(index));
+    }
 };

 /** replaces \c *this by \c *this - \a other.
@@ -161,7 +205,8 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
 {
-  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
+  SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
+  tmp = other.derived();
  return derived();
 }

@@ -174,11 +219,11 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
 {
-  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
+  SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
+  tmp = other.derived();
  return derived();
 }

 } // end namespace Eigen

 #endif // EIGEN_CWISE_BINARY_OP_H
-
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -35,20 +35,25 @@ template<typename NullaryOp, typename PlainObjectType>
 struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
 {
  enum {
-    Flags = traits<PlainObjectType>::Flags & RowMajorBit
+    Flags = (traits<PlainObjectType>::Flags
+      & (  HereditaryBits
+         | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
+         | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
+      | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
+    CoeffReadCost = functor_traits<NullaryOp>::Cost
  };
 };
 }

 template<typename NullaryOp, typename PlainObjectType>
-class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
+class CwiseNullaryOp : internal::no_assignment_operator,
+  public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type
 {
  public:

    typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)

-    EIGEN_DEVICE_FUNC
    CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())
      : m_rows(rows), m_cols(cols), m_functor(func)
    {
@@ -58,24 +63,20 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
            && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
    }

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); }
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }

-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
+    EIGEN_STRONG_INLINE const Scalar coeff(Index rows, Index cols) const
    {
-      return m_functor(rowId, colId);
+      return m_functor(rows, cols);
    }

    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
+    EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
    {
-      return m_functor.packetOp(rowId, colId);
+      return m_functor.packetOp(row, col);
    }

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
    {
      return m_functor(index);
@@ -88,7 +89,6 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
    }

    /** \returns the functor representing the nullary operation */
-    EIGEN_DEVICE_FUNC
    const NullaryOp& functor() const { return m_functor; }

  protected:
@@ -113,10 +113,10 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
  */
 template<typename Derived>
 template<typename CustomNullaryOp>
-EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
 DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
 {
-  return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
+  return CwiseNullaryOp<CustomNullaryOp, Derived>(rows, cols, func);
 }

 /** \returns an expression of a matrix defined by a custom functor \a func
@@ -132,19 +132,16 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f
  *
  * The template parameter \a CustomNullaryOp is the type of the functor.
  *
-  * Here is an example with C++11 random generators: \include random_cpp11.cpp
-  * Output: \verbinclude random_cpp11.out
-  * 
  * \sa class CwiseNullaryOp
  */
 template<typename Derived>
 template<typename CustomNullaryOp>
-EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
 DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, PlainObject>(1, size, func);
-  else return CwiseNullaryOp<CustomNullaryOp, PlainObject>(size, 1, func);
+  if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, Derived>(1, size, func);
+  else return CwiseNullaryOp<CustomNullaryOp, Derived>(size, 1, func);
 }

 /** \returns an expression of a matrix defined by a custom functor \a func
@@ -158,10 +155,10 @@ DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
  */
 template<typename Derived>
 template<typename CustomNullaryOp>
-EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
 DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
 {
-  return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
+  return CwiseNullaryOp<CustomNullaryOp, Derived>(RowsAtCompileTime, ColsAtCompileTime, func);
 }

 /** \returns an expression of a constant matrix of value \a value
@@ -245,7 +242,7 @@ EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturn
 DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,size));
+  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,false>(low,high,size));
 }

 /**
@@ -258,7 +255,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
-  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,Derived::SizeAtCompileTime));
+  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,false>(low,high,Derived::SizeAtCompileTime));
 }

 /**
@@ -279,7 +276,7 @@ EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedRetu
 DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,size));
+  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,true>(low,high,size));
 }

 /**
@@ -292,18 +289,17 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
-  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,Derived::SizeAtCompileTime));
+  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,true>(low,high,Derived::SizeAtCompileTime));
 }

-/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
+/** \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
 template<typename Derived>
 bool DenseBase<Derived>::isApproxToConstant
-(const Scalar& val, const RealScalar& prec) const
+(const Scalar& value, RealScalar prec) const
 {
-  typename internal::nested_eval<Derived,1>::type self(derived());
  for(Index j = 0; j < cols(); ++j)
    for(Index i = 0; i < rows(); ++i)
-      if(!internal::isApprox(self.coeff(i, j), val, prec))
+      if(!internal::isApprox(this->coeff(i, j), value, prec))
        return false;
  return true;
 }
@@ -313,19 +309,19 @@ bool DenseBase<Derived>::isApproxToConstant
  * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
 template<typename Derived>
 bool DenseBase<Derived>::isConstant
-(const Scalar& val, const RealScalar& prec) const
+(const Scalar& value, RealScalar prec) const
 {
-  return isApproxToConstant(val, prec);
+  return isApproxToConstant(value, prec);
 }

-/** Alias for setConstant(): sets all coefficients in this expression to \a val.
+/** Alias for setConstant(): sets all coefficients in this expression to \a value.
  *
  * \sa setConstant(), Constant(), class CwiseNullaryOp
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
+EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& value)
 {
-  setConstant(val);
+  setConstant(value);
 }

 /** Sets all coefficients in this expression to \a value.
@@ -333,9 +329,9 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
  * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& value)
 {
-  return derived() = Constant(rows(), cols(), val);
+  return derived() = Constant(rows(), cols(), value);
 }

 /** Resizes to the given \a size, and sets all coefficients in this expression to the given \a value.
@@ -349,17 +345,17 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
-PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
+PlainObjectBase<Derived>::setConstant(Index size, const Scalar& value)
 {
  resize(size);
-  return setConstant(val);
+  return setConstant(value);
 }

 /** Resizes to the given size, and sets all coefficients in this expression to the given \a value.
  *
  * \param rows the new number of rows
  * \param cols the new number of columns
-  * \param val the value to which all coefficients are set
+  * \param value the value to which all coefficients are set
  *
  * Example: \include Matrix_setConstant_int_int.cpp
  * Output: \verbinclude Matrix_setConstant_int_int.out
@@ -368,10 +364,10 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
-PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
+PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& value)
 {
  resize(rows, cols);
-  return setConstant(val);
+  return setConstant(value);
 }

 /**
@@ -388,10 +384,10 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
  * \sa CwiseNullaryOp
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index size, const Scalar& low, const Scalar& high)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,newSize));
+  return derived() = Derived::NullaryExpr(size, internal::linspaced_op<Scalar,false>(low,high,size));
 }

 /**
@@ -483,12 +479,11 @@ DenseBase<Derived>::Zero()
  * \sa class CwiseNullaryOp, Zero()
  */
 template<typename Derived>
-bool DenseBase<Derived>::isZero(const RealScalar& prec) const
+bool DenseBase<Derived>::isZero(RealScalar prec) const
 {
-  typename internal::nested_eval<Derived,1>::type self(derived());
  for(Index j = 0; j < cols(); ++j)
    for(Index i = 0; i < rows(); ++i)
-      if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<Scalar>(1), prec))
+      if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<Scalar>(1), prec))
        return false;
  return true;
 }
@@ -517,9 +512,9 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
-PlainObjectBase<Derived>::setZero(Index newSize)
+PlainObjectBase<Derived>::setZero(Index size)
 {
-  resize(newSize);
+  resize(size);
  return setConstant(Scalar(0));
 }

@@ -566,7 +561,7 @@ DenseBase<Derived>::Ones(Index rows, Index cols)

 /** \returns an expression of a vector where all coefficients equal one.
  *
-  * The parameter \a newSize is the size of the returned vector.
+  * The parameter \a size is the size of the returned vector.
  * Must be compatible with this MatrixBase type.
  *
  * \only_for_vectors
@@ -582,9 +577,9 @@ DenseBase<Derived>::Ones(Index rows, Index cols)
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
-DenseBase<Derived>::Ones(Index newSize)
+DenseBase<Derived>::Ones(Index size)
 {
-  return Constant(newSize, Scalar(1));
+  return Constant(size, Scalar(1));
 }

 /** \returns an expression of a fixed-size matrix or vector where all coefficients equal one.
@@ -614,7 +609,7 @@ DenseBase<Derived>::Ones()
  */
 template<typename Derived>
 bool DenseBase<Derived>::isOnes
-(const RealScalar& prec) const
+(RealScalar prec) const
 {
  return isApproxToConstant(Scalar(1), prec);
 }
@@ -632,7 +627,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
  return setConstant(Scalar(1));
 }

-/** Resizes to the given \a newSize, and sets all coefficients in this expression to one.
+/** Resizes to the given \a size, and sets all coefficients in this expression to one.
  *
  * \only_for_vectors
  *
@@ -643,9 +638,9 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
-PlainObjectBase<Derived>::setOnes(Index newSize)
+PlainObjectBase<Derived>::setOnes(Index size)
 {
-  resize(newSize);
+  resize(size);
  return setConstant(Scalar(1));
 }

@@ -719,21 +714,20 @@ MatrixBase<Derived>::Identity()
  */
 template<typename Derived>
 bool MatrixBase<Derived>::isIdentity
-(const RealScalar& prec) const
+(RealScalar prec) const
 {
-  typename internal::nested_eval<Derived,1>::type self(derived());
  for(Index j = 0; j < cols(); ++j)
  {
    for(Index i = 0; i < rows(); ++i)
    {
      if(i == j)
      {
-        if(!internal::isApprox(self.coeff(i, j), static_cast<Scalar>(1), prec))
+        if(!internal::isApprox(this->coeff(i, j), static_cast<Scalar>(1), prec))
          return false;
      }
      else
      {
-        if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<RealScalar>(1), prec))
+        if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<RealScalar>(1), prec))
          return false;
      }
    }
@@ -746,7 +740,6 @@ namespace internal {
 template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
 struct setIdentity_impl
 {
-  EIGEN_DEVICE_FUNC
  static EIGEN_STRONG_INLINE Derived& run(Derived& m)
  {
    return m = Derived::Identity(m.rows(), m.cols());
@@ -756,7 +749,7 @@ struct setIdentity_impl
 template<typename Derived>
 struct setIdentity_impl<Derived, true>
 {
-  EIGEN_DEVICE_FUNC
+  typedef typename Derived::Index Index;
  static EIGEN_STRONG_INLINE Derived& run(Derived& m)
  {
    m.setZero();
@@ -805,10 +798,10 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index
  * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
  */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index size, Index i)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);
+  return BasisReturnType(SquareMatrixType::Identity(size,size), i);
 }

 /** \returns an expression of the i-th unit (basis) vector.
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -44,7 +44,10 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
  typedef typename XprType::Nested XprTypeNested;
  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
  enum {
-    Flags = _XprTypeNested::Flags & RowMajorBit 
+    Flags = _XprTypeNested::Flags & (
+      HereditaryBits | LinearAccessBit | AlignedBit
+      | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
+    CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits<UnaryOp>::Cost
  };
 };
 }
@@ -53,34 +56,28 @@ template<typename UnaryOp, typename XprType, typename StorageKind>
 class CwiseUnaryOpImpl;

 template<typename UnaryOp, typename XprType>
-class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator
+class CwiseUnaryOp : internal::no_assignment_operator,
+  public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>
 {
  public:

    typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
-    typedef typename internal::remove_all<XprType>::type NestedExpression;

-    EIGEN_DEVICE_FUNC
-    explicit inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
+    inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
      : m_xpr(xpr), m_functor(func) {}

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); }
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); }

    /** \returns the functor representing the unary operation */
-    EIGEN_DEVICE_FUNC
    const UnaryOp& functor() const { return m_functor; }

    /** \returns the nested expression */
-    EIGEN_DEVICE_FUNC
    const typename internal::remove_all<typename XprType::Nested>::type&
    nestedExpression() const { return m_xpr; }

    /** \returns the nested expression */
-    EIGEN_DEVICE_FUNC
    typename internal::remove_all<typename XprType::Nested>::type&
    nestedExpression() { return m_xpr.const_cast_derived(); }

@@ -89,13 +86,39 @@ class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal
    const UnaryOp m_functor;
 };

-// Generic API dispatcher
-template<typename UnaryOp, typename XprType, typename StorageKind>
-class CwiseUnaryOpImpl
-  : public internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
+// This is the generic implementation for dense storage.
+// It can be used for any expression types implementing the dense concept.
+template<typename UnaryOp, typename XprType>
+class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
+  : public internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
 {
-public:
-  typedef typename internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
+  public:
+
+    typedef CwiseUnaryOp<UnaryOp, XprType> Derived;
+    typedef typename internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+
+    EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
+    {
+      return derived().functor()(derived().nestedExpression().coeff(row, col));
+    }
+
+    template<int LoadMode>
+    EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
+    {
+      return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(row, col));
+    }
+
+    EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
+    {
+      return derived().functor()(derived().nestedExpression().coeff(index));
+    }
+
+    template<int LoadMode>
+    EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
+    {
+      return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(index));
+    }
 };

 } // end namespace Eigen
--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h
@@ -37,8 +37,8 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
  typedef typename MatrixType::Nested MatrixTypeNested;
  typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
  enum {
-    FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
-    Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
+    Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
+    CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits<ViewOp>::Cost,
    MatrixTypeInnerStride =  inner_stride_at_compile_time<MatrixType>::ret,
    // need to cast the sizeof's from size_t to int explicitly, otherwise:
    // "error: no integral type can represent all of the enumerator values
@@ -56,15 +56,15 @@ template<typename ViewOp, typename MatrixType, typename StorageKind>
 class CwiseUnaryViewImpl;

 template<typename ViewOp, typename MatrixType>
-class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
+class CwiseUnaryView : internal::no_assignment_operator,
+  public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
 {
  public:

    typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
-    typedef typename internal::remove_all<MatrixType>::type NestedExpression;

-    explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
+    inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp())
      : m_matrix(mat), m_functor(func) {}

    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView)
@@ -84,19 +84,11 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
    nestedExpression() { return m_matrix.const_cast_derived(); }

  protected:
-    typename internal::ref_selector<MatrixType>::type m_matrix;
+    // FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
+    typename internal::nested<MatrixType>::type m_matrix;
    ViewOp m_functor;
 };

-// Generic API dispatcher
-template<typename ViewOp, typename XprType, typename StorageKind>
-class CwiseUnaryViewImpl
-  : public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type
-{
-public:
-  typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type Base;
-};
-
 template<typename ViewOp, typename MatrixType>
 class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
  : public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type
@@ -107,20 +99,36 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
    typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;

    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
-    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
-    
-    EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); }
-    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); }

-    EIGEN_DEVICE_FUNC inline Index innerStride() const
+    inline Index innerStride() const
    {
      return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
    }

-    EIGEN_DEVICE_FUNC inline Index outerStride() const
+    inline Index outerStride() const
    {
      return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
    }
+
+    EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
+    {
+      return derived().functor()(derived().nestedExpression().coeff(row, col));
+    }
+
+    EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
+    {
+      return derived().functor()(derived().nestedExpression().coeff(index));
+    }
+
+    EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
+    {
+      return derived().functor()(const_cast_derived().nestedExpression().coeffRef(row, col));
+    }
+
+    EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
+    {
+      return derived().functor()(const_cast_derived().nestedExpression().coeffRef(index));
+    }
 };

 } // end namespace Eigen
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -13,16 +13,6 @@

 namespace Eigen {

-namespace internal {
-  
-// The index type defined by EIGEN_DEFAULT_DENSE_INDEX_TYPE must be a signed type.
-// This dummy function simply aims at checking that at compile time.
-static inline void check_DenseIndex_is_signed() {
-  EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE); 
-}
-
-} // end namespace internal
-  
 /** \class DenseBase
  * \ingroup Core_Module
  *
@@ -49,37 +39,22 @@ template<typename Derived> class DenseBase
  public:
    using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
                typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
-    using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
-                typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator/;

-
-    /** Inner iterator type to iterate over the coefficients of a row or column.
-      * \sa class InnerIterator
-      */
-    typedef Eigen::InnerIterator<Derived> InnerIterator;
+    class InnerIterator;

    typedef typename internal::traits<Derived>::StorageKind StorageKind;

-    /**
-      * \brief The type used to store indices
-      * \details This typedef is relevant for types that store multiple indices such as
-      *          PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index
-      * \sa \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase.
-     */
-    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
+    /** \brief The type of indices 
+      * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
+      * \sa \ref TopicPreprocessorDirectives.
+      */
+    typedef typename internal::traits<Derived>::Index Index; 

-    /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc. */
    typedef typename internal::traits<Derived>::Scalar Scalar;
-    
-    /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc.
-      *
-      * It is an alias for the Scalar type */
-    typedef Scalar value_type;
-    
+    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;

-    typedef internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
-                      typename NumTraits<typename internal::traits<Derived>::Scalar>::Real> Base;
+    typedef DenseCoeffsBase<Derived> Base;
    using Base::derived;
    using Base::const_cast_derived;
    using Base::rows;
@@ -89,6 +64,16 @@ template<typename Derived> class DenseBase
    using Base::colIndexByOuterInner;
    using Base::coeff;
    using Base::coeffByOuterInner;
+    using Base::packet;
+    using Base::packetByOuterInner;
+    using Base::writePacket;
+    using Base::writePacketByOuterInner;
+    using Base::coeffRef;
+    using Base::coeffRefByOuterInner;
+    using Base::copyCoeff;
+    using Base::copyCoeffByOuterInner;
+    using Base::copyPacket;
+    using Base::copyPacketByOuterInner;
    using Base::operator();
    using Base::operator[];
    using Base::x;
@@ -174,46 +159,19 @@ template<typename Derived> class DenseBase
      InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)
                             : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),

+      CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
+        /**< This is a rough measure of how expensive it is to read one coefficient from
+          * this expression.
+          */
+
      InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
      OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
    };
-    
-    typedef typename internal::find_best_packet<Scalar,SizeAtCompileTime>::type PacketScalar;

-    enum { IsPlainObjectBase = 0 };
-    
-    /** The plain matrix type corresponding to this expression.
-      * \sa PlainObject */
-    typedef Matrix<typename internal::traits<Derived>::Scalar,
-                internal::traits<Derived>::RowsAtCompileTime,
-                internal::traits<Derived>::ColsAtCompileTime,
-                AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
-                internal::traits<Derived>::MaxRowsAtCompileTime,
-                internal::traits<Derived>::MaxColsAtCompileTime
-          > PlainMatrix;
-    
-    /** The plain array type corresponding to this expression.
-      * \sa PlainObject */
-    typedef Array<typename internal::traits<Derived>::Scalar,
-                internal::traits<Derived>::RowsAtCompileTime,
-                internal::traits<Derived>::ColsAtCompileTime,
-                AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
-                internal::traits<Derived>::MaxRowsAtCompileTime,
-                internal::traits<Derived>::MaxColsAtCompileTime
-          > PlainArray;
-
-    /** \brief The plain matrix or array type corresponding to this expression.
-      *
-      * This is not necessarily exactly the return type of eval(). In the case of plain matrices,
-      * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
-      * that the return type of eval() is either PlainObject or const PlainObject&.
-      */
-    typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
-                                 PlainMatrix, PlainArray>::type PlainObject;
+    enum { ThisConstantIsPrivateInPlainObjectBase };

    /** \returns the number of nonzero coefficients which is in practice the number
      * of stored coefficients. */
-    EIGEN_DEVICE_FUNC
    inline Index nonZeros() const { return size(); }
    /** \returns true if either the number of rows or the number of columns is equal to 1.
      * In other words, this function returns
@@ -225,7 +183,6 @@ template<typename Derived> class DenseBase
      * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
      * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a
      * column-major matrix, and the number of rows for a row-major matrix. */
-    EIGEN_DEVICE_FUNC
    Index outerSize() const
    {
      return IsVectorAtCompileTime ? 1
@@ -237,7 +194,6 @@ template<typename Derived> class DenseBase
      * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
      * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a 
      * column-major matrix, and the number of columns for a row-major matrix. */
-    EIGEN_DEVICE_FUNC
    Index innerSize() const
    {
      return IsVectorAtCompileTime ? this->size()
@@ -248,18 +204,16 @@ template<typename Derived> class DenseBase
      * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
      * nothing else.
      */
-    EIGEN_DEVICE_FUNC
-    void resize(Index newSize)
+    void resize(Index size)
    {
-      EIGEN_ONLY_USED_FOR_DEBUG(newSize);
-      eigen_assert(newSize == this->size()
+      EIGEN_ONLY_USED_FOR_DEBUG(size);
+      eigen_assert(size == this->size()
                && "DenseBase::resize() does not actually allow to resize.");
    }
    /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are
      * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
      * nothing else.
      */
-    EIGEN_DEVICE_FUNC
    void resize(Index rows, Index cols)
    {
      EIGEN_ONLY_USED_FOR_DEBUG(rows);
@@ -269,12 +223,13 @@ template<typename Derived> class DenseBase
    }

 #ifndef EIGEN_PARSED_BY_DOXYGEN
+
    /** \internal Represents a matrix with all coefficients equal to one another*/
-    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
+    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
    /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
-    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,false>,PlainObject> SequentialLinSpacedReturnType;
+    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType;
    /** \internal Represents a vector with linearly spaced coefficients that allows random access. */
-    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,true>,PlainObject> RandomAccessLinSpacedReturnType;
+    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
    /** \internal the return type of MatrixBase::eigenvalues() */
    typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;

@@ -282,122 +237,130 @@ template<typename Derived> class DenseBase

    /** Copies \a other into *this. \returns a reference to *this. */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& operator=(const DenseBase<OtherDerived>& other);

    /** Special case of the template operator=, in order to prevent the compiler
      * from generating a default operator= (issue hit with g++ 4.1)
      */
-    EIGEN_DEVICE_FUNC
    Derived& operator=(const DenseBase& other);

    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& operator=(const EigenBase<OtherDerived> &other);

    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& operator+=(const EigenBase<OtherDerived> &other);

    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& operator-=(const EigenBase<OtherDerived> &other);

    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& operator=(const ReturnByValue<OtherDerived>& func);

-    /** \ínternal
-      * Copies \a other into *this without evaluating other. \returns a reference to *this.
-      * \deprecated */
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** Copies \a other into *this without evaluating other. \returns a reference to *this. */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& lazyAssign(const DenseBase<OtherDerived>& other);
+#endif // not EIGEN_PARSED_BY_DOXYGEN

-    EIGEN_DEVICE_FUNC
    CommaInitializer<Derived> operator<< (const Scalar& s);

-    /** \deprecated it now returns \c *this */
    template<unsigned int Added,unsigned int Removed>
-    EIGEN_DEPRECATED
-    const Derived& flagged() const
-    { return derived(); }
+    const Flagged<Derived, Added, Removed> flagged() const;

    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);

-    typedef Transpose<Derived> TransposeReturnType;
-    EIGEN_DEVICE_FUNC
-    TransposeReturnType transpose();
-    typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
-    EIGEN_DEVICE_FUNC
+    Eigen::Transpose<Derived> transpose();
+    typedef const Transpose<const Derived> ConstTransposeReturnType;
    ConstTransposeReturnType transpose() const;
-    EIGEN_DEVICE_FUNC
    void transposeInPlace();
+#ifndef EIGEN_NO_DEBUG
+  protected:
+    template<typename OtherDerived>
+    void checkTransposeAliasing(const OtherDerived& other) const;
+  public:
+#endif

-    EIGEN_DEVICE_FUNC static const ConstantReturnType
+    typedef VectorBlock<Derived> SegmentReturnType;
+    typedef const VectorBlock<const Derived> ConstSegmentReturnType;
+    template<int Size> struct FixedSegmentReturnType { typedef VectorBlock<Derived, Size> Type; };
+    template<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBlock<const Derived, Size> Type; };
+    
+    // Note: The "DenseBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations.
+    SegmentReturnType segment(Index start, Index size);
+    typename DenseBase::ConstSegmentReturnType segment(Index start, Index size) const;
+
+    SegmentReturnType head(Index size);
+    typename DenseBase::ConstSegmentReturnType head(Index size) const;
+
+    SegmentReturnType tail(Index size);
+    typename DenseBase::ConstSegmentReturnType tail(Index size) const;
+
+    template<int Size> typename FixedSegmentReturnType<Size>::Type head();
+    template<int Size> typename ConstFixedSegmentReturnType<Size>::Type head() const;
+
+    template<int Size> typename FixedSegmentReturnType<Size>::Type tail();
+    template<int Size> typename ConstFixedSegmentReturnType<Size>::Type tail() const;
+
+    template<int Size> typename FixedSegmentReturnType<Size>::Type segment(Index start);
+    template<int Size> typename ConstFixedSegmentReturnType<Size>::Type segment(Index start) const;
+
+    static const ConstantReturnType
    Constant(Index rows, Index cols, const Scalar& value);
-    EIGEN_DEVICE_FUNC static const ConstantReturnType
+    static const ConstantReturnType
    Constant(Index size, const Scalar& value);
-    EIGEN_DEVICE_FUNC static const ConstantReturnType
+    static const ConstantReturnType
    Constant(const Scalar& value);

-    EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
+    static const SequentialLinSpacedReturnType
    LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
-    EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
+    static const RandomAccessLinSpacedReturnType
    LinSpaced(Index size, const Scalar& low, const Scalar& high);
-    EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
+    static const SequentialLinSpacedReturnType
    LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
-    EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
+    static const RandomAccessLinSpacedReturnType
    LinSpaced(const Scalar& low, const Scalar& high);

-    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
-    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
+    template<typename CustomNullaryOp>
+    static const CwiseNullaryOp<CustomNullaryOp, Derived>
    NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
-    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
-    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
+    template<typename CustomNullaryOp>
+    static const CwiseNullaryOp<CustomNullaryOp, Derived>
    NullaryExpr(Index size, const CustomNullaryOp& func);
-    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
-    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
+    template<typename CustomNullaryOp>
+    static const CwiseNullaryOp<CustomNullaryOp, Derived>
    NullaryExpr(const CustomNullaryOp& func);

-    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
-    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);
-    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();
-    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);
-    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);
-    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();
+    static const ConstantReturnType Zero(Index rows, Index cols);
+    static const ConstantReturnType Zero(Index size);
+    static const ConstantReturnType Zero();
+    static const ConstantReturnType Ones(Index rows, Index cols);
+    static const ConstantReturnType Ones(Index size);
+    static const ConstantReturnType Ones();

-    EIGEN_DEVICE_FUNC void fill(const Scalar& value);
-    EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
-    EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
-    EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
-    EIGEN_DEVICE_FUNC Derived& setZero();
-    EIGEN_DEVICE_FUNC Derived& setOnes();
-    EIGEN_DEVICE_FUNC Derived& setRandom();
+    void fill(const Scalar& value);
+    Derived& setConstant(const Scalar& value);
+    Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
+    Derived& setLinSpaced(const Scalar& low, const Scalar& high);
+    Derived& setZero();
+    Derived& setOnes();
+    Derived& setRandom();

-    template<typename OtherDerived> EIGEN_DEVICE_FUNC
+    template<typename OtherDerived>
    bool isApprox(const DenseBase<OtherDerived>& other,
-                  const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    EIGEN_DEVICE_FUNC 
+                  RealScalar prec = NumTraits<Scalar>::dummy_precision()) const;
    bool isMuchSmallerThan(const RealScalar& other,
-                           const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC
+                           RealScalar prec = NumTraits<Scalar>::dummy_precision()) const;
+    template<typename OtherDerived>
    bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,
-                           const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+                           RealScalar prec = NumTraits<Scalar>::dummy_precision()) const;

-    EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    
-    inline bool hasNaN() const;
-    inline bool allFinite() const;
+    bool isApproxToConstant(const Scalar& value, RealScalar prec = NumTraits<Scalar>::dummy_precision()) const;
+    bool isConstant(const Scalar& value, RealScalar prec = NumTraits<Scalar>::dummy_precision()) const;
+    bool isZero(RealScalar prec = NumTraits<Scalar>::dummy_precision()) const;
+    bool isOnes(RealScalar prec = NumTraits<Scalar>::dummy_precision()) const;

-    EIGEN_DEVICE_FUNC
    inline Derived& operator*=(const Scalar& other);
-    EIGEN_DEVICE_FUNC
    inline Derived& operator/=(const Scalar& other);

    typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
@@ -405,10 +368,7 @@ template<typename Derived> class DenseBase
      *
      * Notice that in the case of a plain matrix or vector (not an expression) this function just returns
      * a const reference, in order to avoid a useless copy.
-      * 
-      * \warning Be carefull with eval() and the auto C++ keyword, as detailed in this \link TopicPitfalls_auto_keyword page \endlink.
      */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE EvalReturnType eval() const
    {
      // Even though MSVC does not honor strong inlining when the return type
@@ -416,68 +376,61 @@ template<typename Derived> class DenseBase
      // size types on MSVC.
      return typename internal::eval<Derived>::type(derived());
    }
-    
+
    /** swaps *this with the expression \a other.
      *
      */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    void swap(const DenseBase<OtherDerived>& other)
+    void swap(const DenseBase<OtherDerived>& other,
+              int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase)
    {
-      EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
-      eigen_assert(rows()==other.rows() && cols()==other.cols());
-      call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
+      SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
    }

    /** swaps *this with the matrix or array \a other.
      *
      */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    void swap(PlainObjectBase<OtherDerived>& other)
    {
-      eigen_assert(rows()==other.rows() && cols()==other.cols());
-      call_assignment(derived(), other.derived(), internal::swap_assign_op<Scalar>());
+      SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
    }

-    EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
-    EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
-    EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
-    template<bool Enable> EIGEN_DEVICE_FUNC
-    inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
-    template<bool Enable> EIGEN_DEVICE_FUNC
-    inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();

-    EIGEN_DEVICE_FUNC Scalar sum() const;
-    EIGEN_DEVICE_FUNC Scalar mean() const;
-    EIGEN_DEVICE_FUNC Scalar trace() const;
+    inline const NestByValue<Derived> nestByValue() const;
+    inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
+    inline ForceAlignedAccess<Derived> forceAlignedAccess();
+    template<bool Enable> inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
+    template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();

-    EIGEN_DEVICE_FUNC Scalar prod() const;
+    Scalar sum() const;
+    Scalar mean() const;
+    Scalar trace() const;

-    EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
-    EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
+    Scalar prod() const;

-    template<typename IndexType> EIGEN_DEVICE_FUNC
+    typename internal::traits<Derived>::Scalar minCoeff() const;
+    typename internal::traits<Derived>::Scalar maxCoeff() const;
+
+    template<typename IndexType>
    typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
-    template<typename IndexType> EIGEN_DEVICE_FUNC
+    template<typename IndexType>
    typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
-    template<typename IndexType> EIGEN_DEVICE_FUNC
+    template<typename IndexType>
    typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
-    template<typename IndexType> EIGEN_DEVICE_FUNC
+    template<typename IndexType>
    typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;

    template<typename BinaryOp>
-    EIGEN_DEVICE_FUNC
-    Scalar redux(const BinaryOp& func) const;
+    typename internal::result_of<BinaryOp(typename internal::traits<Derived>::Scalar)>::type
+    redux(const BinaryOp& func) const;

    template<typename Visitor>
-    EIGEN_DEVICE_FUNC
    void visit(Visitor& func) const;

    inline const WithFormat<Derived> format(const IOFormat& fmt) const;

    /** \returns the unique coefficient of a 1x1 expression */
-    EIGEN_DEVICE_FUNC
    CoeffReturnType value() const
    {
      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
@@ -485,8 +438,10 @@ template<typename Derived> class DenseBase
      return derived().coeff(0,0);
    }

-    bool all() const;
-    bool any() const;
+/////////// Array module ///////////
+
+    bool all(void) const;
+    bool any(void) const;
    Index count() const;

    typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
@@ -494,35 +449,14 @@ template<typename Derived> class DenseBase
    typedef VectorwiseOp<Derived, Vertical> ColwiseReturnType;
    typedef const VectorwiseOp<const Derived, Vertical> ConstColwiseReturnType;

-    /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
-    *
-    * Example: \include MatrixBase_rowwise.cpp
-    * Output: \verbinclude MatrixBase_rowwise.out
-    *
-    * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
-    */
-    //Code moved here due to a CUDA compiler bug
-    EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const {
-      return ConstRowwiseReturnType(derived());
-    }
-    EIGEN_DEVICE_FUNC RowwiseReturnType rowwise();
+    ConstRowwiseReturnType rowwise() const;
+    RowwiseReturnType rowwise();
+    ConstColwiseReturnType colwise() const;
+    ColwiseReturnType colwise();

-    /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
-    *
-    * Example: \include MatrixBase_colwise.cpp
-    * Output: \verbinclude MatrixBase_colwise.out
-    *
-    * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
-    */
-    EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const {
-      return ConstColwiseReturnType(derived());
-    }
-    EIGEN_DEVICE_FUNC ColwiseReturnType colwise();
-
-    typedef CwiseNullaryOp<internal::scalar_random_op<Scalar>,PlainObject> RandomReturnType;
-    static const RandomReturnType Random(Index rows, Index cols);
-    static const RandomReturnType Random(Index size);
-    static const RandomReturnType Random();
+    static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index rows, Index cols);
+    static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index size);
+    static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random();

    template<typename ThenDerived,typename ElseDerived>
    const Select<Derived,ThenDerived,ElseDerived>
@@ -531,42 +465,23 @@ template<typename Derived> class DenseBase

    template<typename ThenDerived>
    inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
-    select(const DenseBase<ThenDerived>& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const;
+    select(const DenseBase<ThenDerived>& thenMatrix, typename ThenDerived::Scalar elseScalar) const;

    template<typename ElseDerived>
    inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
-    select(const typename ElseDerived::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
+    select(typename ElseDerived::Scalar thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;

    template<int p> RealScalar lpNorm() const;

    template<int RowFactor, int ColFactor>
-    EIGEN_DEVICE_FUNC
    const Replicate<Derived,RowFactor,ColFactor> replicate() const;
-    /**
-    * \return an expression of the replication of \c *this
-    *
-    * Example: \include MatrixBase_replicate_int_int.cpp
-    * Output: \verbinclude MatrixBase_replicate_int_int.out
-    *
-    * \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
-    */
-    //Code moved here due to a CUDA compiler bug
-    EIGEN_DEVICE_FUNC
-    const Replicate<Derived, Dynamic, Dynamic> replicate(Index rowFactor, Index colFactor) const
-    {
-      return Replicate<Derived, Dynamic, Dynamic>(derived(), rowFactor, colFactor);
-    }
+    const Replicate<Derived,Dynamic,Dynamic> replicate(Index rowFacor,Index colFactor) const;

    typedef Reverse<Derived, BothDirections> ReverseReturnType;
    typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
-    EIGEN_DEVICE_FUNC ReverseReturnType reverse();
-    /** This is the const version of reverse(). */
-    //Code moved here due to a CUDA compiler bug
-    EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const
-    {
-      return ConstReverseReturnType(derived());
-    }
-    EIGEN_DEVICE_FUNC void reverseInPlace();
+    ReverseReturnType reverse();
+    ConstReverseReturnType reverse() const;
+    void reverseInPlace();

 #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
 #   include "../plugins/BlockMethods.h"
@@ -575,18 +490,27 @@ template<typename Derived> class DenseBase
 #   endif
 #undef EIGEN_CURRENT_STORAGE_BASE_CLASS

+#ifdef EIGEN2_SUPPORT
+
+    Block<Derived> corner(CornerType type, Index cRows, Index cCols);
+    const Block<Derived> corner(CornerType type, Index cRows, Index cCols) const;
+    template<int CRows, int CCols>
+    Block<Derived, CRows, CCols> corner(CornerType type);
+    template<int CRows, int CCols>
+    const Block<Derived, CRows, CCols> corner(CornerType type) const;
+
+#endif // EIGEN2_SUPPORT
+

    // disable the use of evalTo for dense objects with a nice compilation error
-    template<typename Dest>
-    EIGEN_DEVICE_FUNC
-    inline void evalTo(Dest& ) const
+    template<typename Dest> inline void evalTo(Dest& ) const
    {
      EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
    }

  protected:
    /** Default constructor. Do nothing. */
-    EIGEN_DEVICE_FUNC DenseBase()
+    DenseBase()
    {
      /* Just checks for self-consistency of the flags.
       * Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down
@@ -599,9 +523,9 @@ template<typename Derived> class DenseBase
    }

  private:
-    EIGEN_DEVICE_FUNC explicit DenseBase(int);
-    EIGEN_DEVICE_FUNC DenseBase(int,int);
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
+    explicit DenseBase(int);
+    DenseBase(int,int);
+    template<typename OtherDerived> explicit DenseBase(const DenseBase<OtherDerived>&);
 };

 } // end namespace Eigen
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -35,6 +35,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
 {
  public:
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;

@@ -60,7 +61,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
    using Base::size;
    using Base::derived;

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const
    {
      return int(Derived::RowsAtCompileTime) == 1 ? 0
@@ -69,7 +69,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
          : inner;
    }

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const
    {
      return int(Derived::ColsAtCompileTime) == 1 ? 0
@@ -92,15 +91,13 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      *
      * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const
      */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
    {
      eigen_internal_assert(row >= 0 && row < rows()
-                         && col >= 0 && col < cols());
-      return internal::evaluator<Derived>(derived()).coeff(row,col);
+                        && col >= 0 && col < cols());
+      return derived().coeff(row, col);
    }

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
    {
      return coeff(rowIndexByOuterInner(outer, inner),
@@ -111,12 +108,11 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      *
      * \sa operator()(Index,Index), operator[](Index)
      */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
    {
      eigen_assert(row >= 0 && row < rows()
          && col >= 0 && col < cols());
-      return coeff(row, col);
+      return derived().coeff(row, col);
    }

    /** Short version: don't use this function, use
@@ -134,12 +130,11 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const
      */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    coeff(Index index) const
    {
      eigen_internal_assert(index >= 0 && index < size());
-      return internal::evaluator<Derived>(derived()).coeff(index);
+      return derived().coeff(index);
    }


@@ -151,14 +146,15 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      * z() const, w() const
      */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    operator[](Index index) const
    {
+      #ifndef EIGEN2_SUPPORT
      EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
                          THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
+      #endif
      eigen_assert(index >= 0 && index < size());
-      return coeff(index);
+      return derived().coeff(index);
    }

    /** \returns the coefficient at given index.
@@ -171,35 +167,30 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      * z() const, w() const
      */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    operator()(Index index) const
    {
      eigen_assert(index >= 0 && index < size());
-      return coeff(index);
+      return derived().coeff(index);
    }

    /** equivalent to operator[](0).  */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    x() const { return (*this)[0]; }

    /** equivalent to operator[](1).  */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    y() const { return (*this)[1]; }

    /** equivalent to operator[](2).  */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    z() const { return (*this)[2]; }

    /** equivalent to operator[](3).  */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    w() const { return (*this)[3]; }

@@ -216,9 +207,9 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
    {
-      typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
-      eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
-      return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(row,col);
+      eigen_internal_assert(row >= 0 && row < rows()
+                      && col >= 0 && col < cols());
+      return derived().template packet<LoadMode>(row,col);
    }


@@ -243,9 +234,8 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    {
-      typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
      eigen_internal_assert(index >= 0 && index < size());
-      return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(index);
+      return derived().template packet<LoadMode>(index);
    }

  protected:
@@ -288,6 +278,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
    typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;

    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
@@ -320,15 +311,13 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      *
      * \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index)
      */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
    {
      eigen_internal_assert(row >= 0 && row < rows()
-                         && col >= 0 && col < cols());
-      return internal::evaluator<Derived>(derived()).coeffRef(row,col);
+                        && col >= 0 && col < cols());
+      return derived().coeffRef(row, col);
    }

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    coeffRefByOuterInner(Index outer, Index inner)
    {
@@ -341,13 +330,12 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      * \sa operator[](Index)
      */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    operator()(Index row, Index col)
    {
      eigen_assert(row >= 0 && row < rows()
          && col >= 0 && col < cols());
-      return coeffRef(row, col);
+      return derived().coeffRef(row, col);
    }


@@ -366,12 +354,11 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      * \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index)
      */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    coeffRef(Index index)
    {
      eigen_internal_assert(index >= 0 && index < size());
-      return internal::evaluator<Derived>(derived()).coeffRef(index);
+      return derived().coeffRef(index);
    }

    /** \returns a reference to the coefficient at given index.
@@ -381,14 +368,15 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
      */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    operator[](Index index)
    {
+      #ifndef EIGEN2_SUPPORT
      EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
                          THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
+      #endif
      eigen_assert(index >= 0 && index < size());
-      return coeffRef(index);
+      return derived().coeffRef(index);
    }

    /** \returns a reference to the coefficient at given index.
@@ -400,37 +388,167 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
      */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    operator()(Index index)
    {
      eigen_assert(index >= 0 && index < size());
-      return coeffRef(index);
+      return derived().coeffRef(index);
    }

    /** equivalent to operator[](0).  */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    x() { return (*this)[0]; }

    /** equivalent to operator[](1).  */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    y() { return (*this)[1]; }

    /** equivalent to operator[](2).  */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    z() { return (*this)[2]; }

    /** equivalent to operator[](3).  */

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    w() { return (*this)[3]; }
+
+    /** \internal
+      * Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility
+      * to ensure that a packet really starts there. This method is only available on expressions having the
+      * PacketAccessBit.
+      *
+      * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
+      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
+      * starting at an address which is a multiple of the packet size.
+      */
+
+    template<int StoreMode>
+    EIGEN_STRONG_INLINE void writePacket
+    (Index row, Index col, const typename internal::packet_traits<Scalar>::type& x)
+    {
+      eigen_internal_assert(row >= 0 && row < rows()
+                        && col >= 0 && col < cols());
+      derived().template writePacket<StoreMode>(row,col,x);
+    }
+
+
+    /** \internal */
+    template<int StoreMode>
+    EIGEN_STRONG_INLINE void writePacketByOuterInner
+    (Index outer, Index inner, const typename internal::packet_traits<Scalar>::type& x)
+    {
+      writePacket<StoreMode>(rowIndexByOuterInner(outer, inner),
+                            colIndexByOuterInner(outer, inner),
+                            x);
+    }
+
+    /** \internal
+      * Stores the given packet of coefficients, at the given index in this expression. It is your responsibility
+      * to ensure that a packet really starts there. This method is only available on expressions having the
+      * PacketAccessBit and the LinearAccessBit.
+      *
+      * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
+      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
+      * starting at an address which is a multiple of the packet size.
+      */
+    template<int StoreMode>
+    EIGEN_STRONG_INLINE void writePacket
+    (Index index, const typename internal::packet_traits<Scalar>::type& x)
+    {
+      eigen_internal_assert(index >= 0 && index < size());
+      derived().template writePacket<StoreMode>(index,x);
+    }
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+
+    /** \internal Copies the coefficient at position (row,col) of other into *this.
+      *
+      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
+      * with usual assignments.
+      *
+      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
+      */
+
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
+    {
+      eigen_internal_assert(row >= 0 && row < rows()
+                        && col >= 0 && col < cols());
+      derived().coeffRef(row, col) = other.derived().coeff(row, col);
+    }
+
+    /** \internal Copies the coefficient at the given index of other into *this.
+      *
+      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
+      * with usual assignments.
+      *
+      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
+      */
+
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
+    {
+      eigen_internal_assert(index >= 0 && index < size());
+      derived().coeffRef(index) = other.derived().coeff(index);
+    }
+
+
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
+    {
+      const Index row = rowIndexByOuterInner(outer,inner);
+      const Index col = colIndexByOuterInner(outer,inner);
+      // derived() is important here: copyCoeff() may be reimplemented in Derived!
+      derived().copyCoeff(row, col, other);
+    }
+
+    /** \internal Copies the packet at position (row,col) of other into *this.
+      *
+      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
+      * with usual assignments.
+      *
+      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
+      */
+
+    template<typename OtherDerived, int StoreMode, int LoadMode>
+    EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
+    {
+      eigen_internal_assert(row >= 0 && row < rows()
+                        && col >= 0 && col < cols());
+      derived().template writePacket<StoreMode>(row, col,
+        other.derived().template packet<LoadMode>(row, col));
+    }
+
+    /** \internal Copies the packet at the given index of other into *this.
+      *
+      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
+      * with usual assignments.
+      *
+      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
+      */
+
+    template<typename OtherDerived, int StoreMode, int LoadMode>
+    EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase<OtherDerived>& other)
+    {
+      eigen_internal_assert(index >= 0 && index < size());
+      derived().template writePacket<StoreMode>(index,
+        other.derived().template packet<LoadMode>(index));
+    }
+
+    /** \internal */
+    template<typename OtherDerived, int StoreMode, int LoadMode>
+    EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
+    {
+      const Index row = rowIndexByOuterInner(outer,inner);
+      const Index col = colIndexByOuterInner(outer,inner);
+      // derived() is important here: copyCoeff() may be reimplemented in Derived!
+      derived().template copyPacket< OtherDerived, StoreMode, LoadMode>(row, col, other);
+    }
+#endif
+
 };

 /** \brief Base class providing direct read-only coefficient access to matrices and arrays.
@@ -450,6 +568,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
  public:

    typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
+    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;

@@ -462,7 +581,6 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
      *
      * \sa outerStride(), rowStride(), colStride()
      */
-    EIGEN_DEVICE_FUNC
    inline Index innerStride() const
    {
      return derived().innerStride();
@@ -473,7 +591,6 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
      *
      * \sa innerStride(), rowStride(), colStride()
      */
-    EIGEN_DEVICE_FUNC
    inline Index outerStride() const
    {
      return derived().outerStride();
@@ -489,7 +606,6 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
      *
      * \sa innerStride(), outerStride(), colStride()
      */
-    EIGEN_DEVICE_FUNC
    inline Index rowStride() const
    {
      return Derived::IsRowMajor ? outerStride() : innerStride();
@@ -499,7 +615,6 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
      *
      * \sa innerStride(), outerStride(), rowStride()
      */
-    EIGEN_DEVICE_FUNC
    inline Index colStride() const
    {
      return Derived::IsRowMajor ? innerStride() : outerStride();
@@ -524,6 +639,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
  public:

    typedef DenseCoeffsBase<Derived, WriteAccessors> Base;
+    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;

@@ -536,7 +652,6 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
      *
      * \sa outerStride(), rowStride(), colStride()
      */
-    EIGEN_DEVICE_FUNC
    inline Index innerStride() const
    {
      return derived().innerStride();
@@ -547,7 +662,6 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
      *
      * \sa innerStride(), rowStride(), colStride()
      */
-    EIGEN_DEVICE_FUNC
    inline Index outerStride() const
    {
      return derived().outerStride();
@@ -563,7 +677,6 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
      *
      * \sa innerStride(), outerStride(), colStride()
      */
-    EIGEN_DEVICE_FUNC
    inline Index rowStride() const
    {
      return Derived::IsRowMajor ? outerStride() : innerStride();
@@ -573,7 +686,6 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
      *
      * \sa innerStride(), outerStride(), rowStride()
      */
-    EIGEN_DEVICE_FUNC
    inline Index colStride() const
    {
      return Derived::IsRowMajor ? innerStride() : outerStride();
@@ -582,42 +694,33 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>

 namespace internal {

-template<int Alignment, typename Derived, bool JustReturnZero>
+template<typename Derived, bool JustReturnZero>
 struct first_aligned_impl
 {
-  static inline Index run(const Derived&)
+  static inline typename Derived::Index run(const Derived&)
  { return 0; }
 };

-template<int Alignment, typename Derived>
-struct first_aligned_impl<Alignment, Derived, false>
+template<typename Derived>
+struct first_aligned_impl<Derived, false>
 {
-  static inline Index run(const Derived& m)
+  static inline typename Derived::Index run(const Derived& m)
  {
-    return internal::first_aligned<Alignment>(&m.const_cast_derived().coeffRef(0,0), m.size());
+    return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
  }
 };

-/** \internal \returns the index of the first element of the array stored by \a m that is properly aligned with respect to \a Alignment for vectorization.
-  *
-  * \tparam Alignment requested alignment in Bytes.
+/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
  *
  * There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more
  * documentation.
  */
-template<int Alignment, typename Derived>
-static inline Index first_aligned(const DenseBase<Derived>& m)
-{
-  enum { ReturnZero = (int(evaluator<Derived>::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) };
-  return first_aligned_impl<Alignment, Derived, ReturnZero>::run(m.derived());
-}
-
 template<typename Derived>
-static inline Index first_default_aligned(const DenseBase<Derived>& m)
+static inline typename Derived::Index first_aligned(const Derived& m)
 {
-  typedef typename Derived::Scalar Scalar;
-  typedef typename packet_traits<Scalar>::type DefaultPacketType;
-  return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(m);
+  return first_aligned_impl
+          <Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
+          ::run(m);
 }

 template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -3,7 +3,7 @@
 //
 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2010-2013 Hauke Heibel <hauke.heibel@gmail.com>
+// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -24,38 +24,19 @@ namespace internal {

 struct constructor_without_unaligned_array_assert {};

-template<typename T, int Size>
-EIGEN_DEVICE_FUNC
-void check_static_allocation_size()
-{
-  // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
-  #if EIGEN_STACK_ALLOCATION_LIMIT
-  EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
-  #endif
-}
-
 /** \internal
  * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
  * to 16 bytes boundary if the total size is a multiple of 16 bytes.
  */
 template <typename T, int Size, int MatrixOrArrayOptions,
          int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
-                        : compute_default_alignment<T,Size>::value >
+                        : (((Size*sizeof(T))%16)==0) ? 16
+                        : 0 >
 struct plain_array
 {
  T array[Size];
-
-  EIGEN_DEVICE_FUNC
-  plain_array()
-  { 
-    check_static_allocation_size<T,Size>();
-  }
-
-  EIGEN_DEVICE_FUNC
-  plain_array(constructor_without_unaligned_array_assert)
-  { 
-    check_static_allocation_size<T,Size>();
-  }
+  plain_array() {}
+  plain_array(constructor_without_unaligned_array_assert) {}
 };

 #if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
@@ -67,100 +48,32 @@ struct plain_array
  template<typename PtrType>
  EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
  #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
-    eigen_assert((reinterpret_cast<size_t>(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
+    eigen_assert((reinterpret_cast<size_t>(eigen_unaligned_array_assert_workaround_gcc47(array)) & sizemask) == 0 \
              && "this assertion is explained here: " \
              "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
              " **** READ THIS WEB PAGE !!! ****");
 #else
  #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
-    eigen_assert((reinterpret_cast<size_t>(array) & (sizemask)) == 0 \
+    eigen_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
              && "this assertion is explained here: " \
              "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
              " **** READ THIS WEB PAGE !!! ****");
 #endif

-template <typename T, int Size, int MatrixOrArrayOptions>
-struct plain_array<T, Size, MatrixOrArrayOptions, 8>
-{
-  EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
-
-  EIGEN_DEVICE_FUNC
-  plain_array() 
-  {
-    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
-    check_static_allocation_size<T,Size>();
-  }
-
-  EIGEN_DEVICE_FUNC
-  plain_array(constructor_without_unaligned_array_assert) 
-  { 
-    check_static_allocation_size<T,Size>();
-  }
-};
-
 template <typename T, int Size, int MatrixOrArrayOptions>
 struct plain_array<T, Size, MatrixOrArrayOptions, 16>
 {
-  EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
-
-  EIGEN_DEVICE_FUNC
-  plain_array() 
-  { 
-    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
-    check_static_allocation_size<T,Size>();
-  }
-
-  EIGEN_DEVICE_FUNC
-  plain_array(constructor_without_unaligned_array_assert) 
-  { 
-    check_static_allocation_size<T,Size>();
-  }
-};
-
-template <typename T, int Size, int MatrixOrArrayOptions>
-struct plain_array<T, Size, MatrixOrArrayOptions, 32>
-{
-  EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
-
-  EIGEN_DEVICE_FUNC
-  plain_array() 
-  {
-    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
-    check_static_allocation_size<T,Size>();
-  }
-
-  EIGEN_DEVICE_FUNC
-  plain_array(constructor_without_unaligned_array_assert) 
-  { 
-    check_static_allocation_size<T,Size>();
-  }
-};
-
-template <typename T, int Size, int MatrixOrArrayOptions>
-struct plain_array<T, Size, MatrixOrArrayOptions, 64>
-{
-  EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
-
-  EIGEN_DEVICE_FUNC
-  plain_array() 
-  { 
-    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
-    check_static_allocation_size<T,Size>();
-  }
-
-  EIGEN_DEVICE_FUNC
-  plain_array(constructor_without_unaligned_array_assert) 
-  { 
-    check_static_allocation_size<T,Size>();
-  }
+  EIGEN_USER_ALIGN16 T array[Size];
+  plain_array() { EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf) }
+  plain_array(constructor_without_unaligned_array_assert) {}
 };

 template <typename T, int MatrixOrArrayOptions, int Alignment>
 struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
 {
-  T array[1];
-  EIGEN_DEVICE_FUNC plain_array() {}
-  EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
+  EIGEN_USER_ALIGN16 T array[1];
+  plain_array() {}
+  plain_array(constructor_without_unaligned_array_assert) {}
 };

 } // end namespace internal
@@ -184,50 +97,33 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
 {
    internal::plain_array<T,Size,_Options> m_data;
  public:
-    EIGEN_DEVICE_FUNC DenseStorage() {}
-    EIGEN_DEVICE_FUNC
-    explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+    inline explicit DenseStorage() {}
+    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()) {}
-    EIGEN_DEVICE_FUNC 
-    DenseStorage(const DenseStorage& other) : m_data(other.m_data) {}
-    EIGEN_DEVICE_FUNC 
-    DenseStorage& operator=(const DenseStorage& other)
-    { 
-      if (this != &other) m_data = other.m_data;
-      return *this; 
-    }
-    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
-      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
-      eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
-      EIGEN_UNUSED_VARIABLE(size);
-      EIGEN_UNUSED_VARIABLE(rows);
-      EIGEN_UNUSED_VARIABLE(cols);
-    }
-    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
-    EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
-    EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
-    EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
-    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+    inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
+    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
+    static inline DenseIndex rows(void) {return _Rows;}
+    static inline DenseIndex cols(void) {return _Cols;}
+    inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
+    inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
+    inline const T *data() const { return m_data.array; }
+    inline T *data() { return m_data.array; }
 };

 // null matrix
 template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
 {
  public:
-    EIGEN_DEVICE_FUNC DenseStorage() {}
-    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}
-    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
-    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
-    EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
-    EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
-    EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
-    EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
-    EIGEN_DEVICE_FUNC const T *data() const { return 0; }
-    EIGEN_DEVICE_FUNC T *data() { return 0; }
+    inline explicit DenseStorage() {}
+    inline DenseStorage(internal::constructor_without_unaligned_array_assert) {}
+    inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
+    inline void swap(DenseStorage& ) {}
+    static inline DenseIndex rows(void) {return _Rows;}
+    static inline DenseIndex cols(void) {return _Cols;}
+    inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
+    inline void resize(DenseIndex,DenseIndex,DenseIndex) {}
+    inline const T *data() const { return 0; }
+    inline T *data() { return 0; }
 };

 // more specializations for null matrices; these are necessary to resolve ambiguities
@@ -244,157 +140,86 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic,
 template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
 {
    internal::plain_array<T,Size,_Options> m_data;
-    Index m_rows;
-    Index m_cols;
+    DenseIndex m_rows;
+    DenseIndex m_cols;
  public:
-    EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
-    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+    inline explicit DenseStorage() : m_rows(0), m_cols(0) {}
+    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
-    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
-    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) 
-    { 
-      if (this != &other)
-      {
-        m_data = other.m_data;
-        m_rows = other.m_rows;
-        m_cols = other.m_cols;
-      }
-      return *this; 
-    }
-    EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
-    EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
+    inline DenseStorage(DenseIndex, DenseIndex rows, DenseIndex cols) : m_rows(rows), m_cols(cols) {}
+    inline void swap(DenseStorage& other)
    { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
-    EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}
-    EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}
-    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
-    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
-    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
-    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+    inline DenseIndex rows(void) const {return m_rows;}
+    inline DenseIndex cols(void) const {return m_cols;}
+    inline void conservativeResize(DenseIndex, DenseIndex rows, DenseIndex cols) { m_rows = rows; m_cols = cols; }
+    inline void resize(DenseIndex, DenseIndex rows, DenseIndex cols) { m_rows = rows; m_cols = cols; }
+    inline const T *data() const { return m_data.array; }
+    inline T *data() { return m_data.array; }
 };

 // dynamic-size matrix with fixed-size storage and fixed width
 template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Size, Dynamic, _Cols, _Options>
 {
    internal::plain_array<T,Size,_Options> m_data;
-    Index m_rows;
+    DenseIndex m_rows;
  public:
-    EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
-    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+    inline explicit DenseStorage() : m_rows(0) {}
+    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
-    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
-    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) 
-    {
-      if (this != &other)
-      {
-        m_data = other.m_data;
-        m_rows = other.m_rows;
-      }
-      return *this; 
-    }
-    EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
-    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
-    EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
-    EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;}
-    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
-    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
-    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
-    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+    inline DenseStorage(DenseIndex, DenseIndex rows, DenseIndex) : m_rows(rows) {}
+    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+    inline DenseIndex rows(void) const {return m_rows;}
+    inline DenseIndex cols(void) const {return _Cols;}
+    inline void conservativeResize(DenseIndex, DenseIndex rows, DenseIndex) { m_rows = rows; }
+    inline void resize(DenseIndex, DenseIndex rows, DenseIndex) { m_rows = rows; }
+    inline const T *data() const { return m_data.array; }
+    inline T *data() { return m_data.array; }
 };

 // dynamic-size matrix with fixed-size storage and fixed height
 template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Size, _Rows, Dynamic, _Options>
 {
    internal::plain_array<T,Size,_Options> m_data;
-    Index m_cols;
+    DenseIndex m_cols;
  public:
-    EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
-    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+    inline explicit DenseStorage() : m_cols(0) {}
+    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
-    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
-    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
-    {
-      if (this != &other)
-      {
-        m_data = other.m_data;
-        m_cols = other.m_cols;
-      }
-      return *this;
-    }
-    EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
-    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
-    EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;}
-    EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
-    void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
-    void resize(Index, Index, Index cols) { m_cols = cols; }
-    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
-    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+    inline DenseStorage(DenseIndex, DenseIndex, DenseIndex cols) : m_cols(cols) {}
+    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+    inline DenseIndex rows(void) const {return _Rows;}
+    inline DenseIndex cols(void) const {return m_cols;}
+    inline void conservativeResize(DenseIndex, DenseIndex, DenseIndex cols) { m_cols = cols; }
+    inline void resize(DenseIndex, DenseIndex, DenseIndex cols) { m_cols = cols; }
+    inline const T *data() const { return m_data.array; }
+    inline T *data() { return m_data.array; }
 };

 // purely dynamic matrix.
 template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynamic, _Options>
 {
    T *m_data;
-    Index m_rows;
-    Index m_cols;
+    DenseIndex m_rows;
+    DenseIndex m_cols;
  public:
-    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
-    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+    inline explicit DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
+    inline DenseStorage(internal::constructor_without_unaligned_array_assert)
       : m_data(0), m_rows(0), m_cols(0) {}
-    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
-      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
-    {
-      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
-      eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
-    }
-    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
-      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
-      , m_rows(other.m_rows)
-      , m_cols(other.m_cols)
-    {
-      internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
-    }
-    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
-    {
-      if (this != &other)
-      {
-        DenseStorage tmp(other);
-        this->swap(tmp);
-      }
-      return *this;
-    }
-#ifdef EIGEN_HAVE_RVALUE_REFERENCES
-    EIGEN_DEVICE_FUNC
-    DenseStorage(DenseStorage&& other)
-      : m_data(std::move(other.m_data))
-      , m_rows(std::move(other.m_rows))
-      , m_cols(std::move(other.m_cols))
-    {
-      other.m_data = nullptr;
-      other.m_rows = 0;
-      other.m_cols = 0;
-    }
-    EIGEN_DEVICE_FUNC
-    DenseStorage& operator=(DenseStorage&& other)
-    {
-      using std::swap;
-      swap(m_data, other.m_data);
-      swap(m_rows, other.m_rows);
-      swap(m_cols, other.m_cols);
-      return *this;
-    }
-#endif
-    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
-    EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
+    inline DenseStorage(DenseIndex size, DenseIndex rows, DenseIndex cols)
+      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols) 
+    { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
+    inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
+    inline void swap(DenseStorage& other)
    { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
-    EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
-    EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
-    void conservativeResize(Index size, Index rows, Index cols)
+    inline DenseIndex rows(void) const {return m_rows;}
+    inline DenseIndex cols(void) const {return m_cols;}
+    inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex cols)
    {
      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
      m_rows = rows;
      m_cols = cols;
    }
-    EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols)
+    void resize(DenseIndex size, DenseIndex rows, DenseIndex cols)
    {
      if(size != m_rows*m_cols)
      {
@@ -408,67 +233,30 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
      m_rows = rows;
      m_cols = cols;
    }
-    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
-    EIGEN_DEVICE_FUNC T *data() { return m_data; }
+    inline const T *data() const { return m_data; }
+    inline T *data() { return m_data; }
 };

 // matrix with dynamic width and fixed height (so that matrix has dynamic size).
 template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Rows, Dynamic, _Options>
 {
    T *m_data;
-    Index m_cols;
+    DenseIndex m_cols;
  public:
-    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
-    explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
-    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
-    {
-      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
-      eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);
-      EIGEN_UNUSED_VARIABLE(rows);
-    }
-    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
-      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
-      , m_cols(other.m_cols)
-    {
-      internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
-    }
-    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
-    {
-      if (this != &other)
-      {
-        DenseStorage tmp(other);
-        this->swap(tmp);
-      }
-      return *this;
-    }    
-#ifdef EIGEN_HAVE_RVALUE_REFERENCES
-    EIGEN_DEVICE_FUNC
-    DenseStorage(DenseStorage&& other)
-      : m_data(std::move(other.m_data))
-      , m_cols(std::move(other.m_cols))
-    {
-      other.m_data = nullptr;
-      other.m_cols = 0;
-    }
-    EIGEN_DEVICE_FUNC
-    DenseStorage& operator=(DenseStorage&& other)
-    {
-      using std::swap;
-      swap(m_data, other.m_data);
-      swap(m_cols, other.m_cols);
-      return *this;
-    }
-#endif
-    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
-    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
-    EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
-    EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
-    EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
+    inline explicit DenseStorage() : m_data(0), m_cols(0) {}
+    inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
+    inline DenseStorage(DenseIndex size, DenseIndex, DenseIndex cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
+    { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
+    inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
+    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+    static inline DenseIndex rows(void) {return _Rows;}
+    inline DenseIndex cols(void) const {return m_cols;}
+    inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex cols)
    {
      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
      m_cols = cols;
    }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols)
+    EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex, DenseIndex cols)
    {
      if(size != _Rows*m_cols)
      {
@@ -481,67 +269,30 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
      }
      m_cols = cols;
    }
-    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
-    EIGEN_DEVICE_FUNC T *data() { return m_data; }
+    inline const T *data() const { return m_data; }
+    inline T *data() { return m_data; }
 };

 // matrix with dynamic height and fixed width (so that matrix has dynamic size).
 template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dynamic, _Cols, _Options>
 {
    T *m_data;
-    Index m_rows;
+    DenseIndex m_rows;
  public:
-    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
-    explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
-    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
-    {
-      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
-      eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);
-      EIGEN_UNUSED_VARIABLE(cols);
-    }
-    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
-      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
-      , m_rows(other.m_rows)
-    {
-      internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
-    }
-    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
-    {
-      if (this != &other)
-      {
-        DenseStorage tmp(other);
-        this->swap(tmp);
-      }
-      return *this;
-    }    
-#ifdef EIGEN_HAVE_RVALUE_REFERENCES
-    EIGEN_DEVICE_FUNC
-    DenseStorage(DenseStorage&& other)
-      : m_data(std::move(other.m_data))
-      , m_rows(std::move(other.m_rows))
-    {
-      other.m_data = nullptr;
-      other.m_rows = 0;
-    }
-    EIGEN_DEVICE_FUNC
-    DenseStorage& operator=(DenseStorage&& other)
-    {
-      using std::swap;
-      swap(m_data, other.m_data);
-      swap(m_rows, other.m_rows);
-      return *this;
-    }
-#endif
-    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
-    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
-    EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
-    EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
-    void conservativeResize(Index size, Index rows, Index)
+    inline explicit DenseStorage() : m_data(0), m_rows(0) {}
+    inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
+    inline DenseStorage(DenseIndex size, DenseIndex rows, DenseIndex) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
+    { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
+    inline ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
+    inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+    inline DenseIndex rows(void) const {return m_rows;}
+    static inline DenseIndex cols(void) {return _Cols;}
+    inline void conservativeResize(DenseIndex size, DenseIndex rows, DenseIndex)
    {
      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
      m_rows = rows;
    }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index)
+    EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex rows, DenseIndex)
    {
      if(size != m_rows*_Cols)
      {
@@ -554,8 +305,8 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
      }
      m_rows = rows;
    }
-    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
-    EIGEN_DEVICE_FUNC T *data() { return m_data; }
+    inline const T *data() const { return m_data; }
+    inline T *data() { return m_data; }
 };

 } // end namespace Eigen
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@@ -37,22 +37,23 @@ template<typename MatrixType, int DiagIndex>
 struct traits<Diagonal<MatrixType,DiagIndex> >
 : traits<MatrixType>
 {
-  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+  typedef typename nested<MatrixType>::type MatrixTypeNested;
  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
  typedef typename MatrixType::StorageKind StorageKind;
  enum {
-    RowsAtCompileTime = (int(DiagIndex) == DynamicIndex || int(MatrixType::SizeAtCompileTime) == Dynamic) ? Dynamic
-                      : (EIGEN_PLAIN_ENUM_MIN(MatrixType::RowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
-                                              MatrixType::ColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
+    RowsAtCompileTime = (int(DiagIndex) == Dynamic || int(MatrixType::SizeAtCompileTime) == Dynamic) ? Dynamic
+    : (EIGEN_PLAIN_ENUM_MIN(MatrixType::RowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
+                            MatrixType::ColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
    ColsAtCompileTime = 1,
    MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic
-                         : DiagIndex == DynamicIndex ? EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime,
+                         : DiagIndex == Dynamic ? EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime,
                                                                              MatrixType::MaxColsAtCompileTime)
                         : (EIGEN_PLAIN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
                                                 MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
    MaxColsAtCompileTime = 1,
    MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
-    Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
+    Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit,
+    CoeffReadCost = _MatrixTypeNested::CoeffReadCost,
    MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
    InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
    OuterStrideAtCompileTime = 0
@@ -60,37 +61,28 @@ struct traits<Diagonal<MatrixType,DiagIndex> >
 };
 }

-template<typename MatrixType, int _DiagIndex> class Diagonal
-   : public internal::dense_xpr_base< Diagonal<MatrixType,_DiagIndex> >::type
+template<typename MatrixType, int DiagIndex> class Diagonal
+   : public internal::dense_xpr_base< Diagonal<MatrixType,DiagIndex> >::type
 {
  public:

-    enum { DiagIndex = _DiagIndex };
    typedef typename internal::dense_xpr_base<Diagonal>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)

-    EIGEN_DEVICE_FUNC
-    explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
+    inline Diagonal(MatrixType& matrix, Index index = DiagIndex) : m_matrix(matrix), m_index(index) {}

    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)

-    EIGEN_DEVICE_FUNC
    inline Index rows() const
-    {
-      return m_index.value()<0 ? numext::mini<Index>(m_matrix.cols(),m_matrix.rows()+m_index.value())
-                               : numext::mini<Index>(m_matrix.rows(),m_matrix.cols()-m_index.value());
-    }
+    { return m_index.value()<0 ? (std::min)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }

-    EIGEN_DEVICE_FUNC
    inline Index cols() const { return 1; }

-    EIGEN_DEVICE_FUNC
    inline Index innerStride() const
    {
      return m_matrix.outerStride() + 1;
    }

-    EIGEN_DEVICE_FUNC
    inline Index outerStride() const
    {
      return 0;
@@ -102,75 +94,62 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
                       const Scalar
                     >::type ScalarWithConstIfNotLvalue;

-    EIGEN_DEVICE_FUNC
    inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
-    EIGEN_DEVICE_FUNC
    inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }

-    EIGEN_DEVICE_FUNC
    inline Scalar& coeffRef(Index row, Index)
    {
      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
      return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
    }

-    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index row, Index) const
    {
      return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
    }

-    EIGEN_DEVICE_FUNC
    inline CoeffReturnType coeff(Index row, Index) const
    {
      return m_matrix.coeff(row+rowOffset(), row+colOffset());
    }

-    EIGEN_DEVICE_FUNC
-    inline Scalar& coeffRef(Index idx)
+    inline Scalar& coeffRef(Index index)
    {
      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
-      return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
+      return m_matrix.const_cast_derived().coeffRef(index+rowOffset(), index+colOffset());
    }

-    EIGEN_DEVICE_FUNC
-    inline const Scalar& coeffRef(Index idx) const
+    inline const Scalar& coeffRef(Index index) const
    {
-      return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
+      return m_matrix.const_cast_derived().coeffRef(index+rowOffset(), index+colOffset());
    }

-    EIGEN_DEVICE_FUNC
-    inline CoeffReturnType coeff(Index idx) const
+    inline CoeffReturnType coeff(Index index) const
    {
-      return m_matrix.coeff(idx+rowOffset(), idx+colOffset());
+      return m_matrix.coeff(index+rowOffset(), index+colOffset());
    }

-    EIGEN_DEVICE_FUNC
-    inline const typename internal::remove_all<typename MatrixType::Nested>::type& 
+    const typename internal::remove_all<typename MatrixType::Nested>::type& 
    nestedExpression() const 
    {
      return m_matrix;
    }

-    EIGEN_DEVICE_FUNC
-    inline Index index() const
+    int index() const
    {
      return m_index.value();
    }

  protected:
    typename MatrixType::Nested m_matrix;
-    const internal::variable_if_dynamicindex<Index, DiagIndex> m_index;
+    const internal::variable_if_dynamic<Index, DiagIndex> m_index;

  private:
    // some compilers may fail to optimize std::max etc in case of compile-time constants...
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
-    // trigger a compile-time error if someone try to call packet
+    // triger a compile time error is someone try to call packet
    template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
    template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
 };
@@ -187,12 +166,12 @@ template<typename Derived>
 inline typename MatrixBase<Derived>::DiagonalReturnType
 MatrixBase<Derived>::diagonal()
 {
-  return DiagonalReturnType(derived());
+  return derived();
 }

 /** This is the const version of diagonal(). */
 template<typename Derived>
-inline typename MatrixBase<Derived>::ConstDiagonalReturnType
+inline const typename MatrixBase<Derived>::ConstDiagonalReturnType
 MatrixBase<Derived>::diagonal() const
 {
  return ConstDiagonalReturnType(derived());
@@ -210,18 +189,18 @@ MatrixBase<Derived>::diagonal() const
  *
  * \sa MatrixBase::diagonal(), class Diagonal */
 template<typename Derived>
-inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType
+inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Dynamic>::Type
 MatrixBase<Derived>::diagonal(Index index)
 {
-  return DiagonalDynamicIndexReturnType(derived(), index);
+  return typename DiagonalIndexReturnType<Dynamic>::Type(derived(), index);
 }

 /** This is the const version of diagonal(Index). */
 template<typename Derived>
-inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType
+inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Dynamic>::Type
 MatrixBase<Derived>::diagonal(Index index) const
 {
-  return ConstDiagonalDynamicIndexReturnType(derived(), index);
+  return typename ConstDiagonalIndexReturnType<Dynamic>::Type(derived(), index);
 }

 /** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
@@ -236,20 +215,20 @@ MatrixBase<Derived>::diagonal(Index index) const
  *
  * \sa MatrixBase::diagonal(), class Diagonal */
 template<typename Derived>
-template<int Index_>
-inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type
+template<int Index>
+inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index>::Type
 MatrixBase<Derived>::diagonal()
 {
-  return typename DiagonalIndexReturnType<Index_>::Type(derived());
+  return derived();
 }

 /** This is the const version of diagonal<int>(). */
 template<typename Derived>
-template<int Index_>
-inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type
+template<int Index>
+inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index>::Type
 MatrixBase<Derived>::diagonal() const
 {
-  return typename ConstDiagonalIndexReturnType<Index_>::Type(derived());
+  return derived();
 }

 } // end namespace Eigen
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -22,7 +22,7 @@ class DiagonalBase : public EigenBase<Derived>
    typedef typename DiagonalVectorType::Scalar Scalar;
    typedef typename DiagonalVectorType::RealScalar RealScalar;
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
-    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
+    typedef typename internal::traits<Derived>::Index Index;

    enum {
      RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
@@ -30,62 +30,74 @@ class DiagonalBase : public EigenBase<Derived>
      MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
      MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
      IsVectorAtCompileTime = 0,
-      Flags = NoPreferredStorageOrderBit
+      Flags = 0
    };

    typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType;
    typedef DenseMatrixType DenseType;
    typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;

-    EIGEN_DEVICE_FUNC
    inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
-    EIGEN_DEVICE_FUNC
    inline Derived& derived() { return *static_cast<Derived*>(this); }

-    EIGEN_DEVICE_FUNC
    DenseMatrixType toDenseMatrix() const { return derived(); }
-    
-    EIGEN_DEVICE_FUNC
+    template<typename DenseDerived>
+    void evalTo(MatrixBase<DenseDerived> &other) const;
+    template<typename DenseDerived>
+    void addTo(MatrixBase<DenseDerived> &other) const
+    { other.diagonal() += diagonal(); }
+    template<typename DenseDerived>
+    void subTo(MatrixBase<DenseDerived> &other) const
+    { other.diagonal() -= diagonal(); }
+
    inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
-    EIGEN_DEVICE_FUNC
    inline DiagonalVectorType& diagonal() { return derived().diagonal(); }

-    EIGEN_DEVICE_FUNC
    inline Index rows() const { return diagonal().size(); }
-    EIGEN_DEVICE_FUNC
    inline Index cols() const { return diagonal().size(); }

    template<typename MatrixDerived>
-    EIGEN_DEVICE_FUNC
-    const Product<Derived,MatrixDerived,LazyProduct>
-    operator*(const MatrixBase<MatrixDerived> &matrix) const
-    {
-      return Product<Derived, MatrixDerived, LazyProduct>(derived(),matrix.derived());
-    }
+    const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
+    operator*(const MatrixBase<MatrixDerived> &matrix) const;

-    typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> > InverseReturnType;
-    EIGEN_DEVICE_FUNC
-    inline const InverseReturnType
+    inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
    inverse() const
    {
-      return InverseReturnType(diagonal().cwiseInverse());
+      return diagonal().cwiseInverse();
    }
    
-    typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> > ScalarMultipleReturnType;
-    EIGEN_DEVICE_FUNC
-    inline const ScalarMultipleReturnType
+    inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
    operator*(const Scalar& scalar) const
    {
-      return ScalarMultipleReturnType(diagonal() * scalar);
+      return diagonal() * scalar;
    }
-    EIGEN_DEVICE_FUNC
-    friend inline const ScalarMultipleReturnType
+    friend inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
    operator*(const Scalar& scalar, const DiagonalBase& other)
    {
-      return ScalarMultipleReturnType(other.diagonal() * scalar);
+      return other.diagonal() * scalar;
    }
+    
+    #ifdef EIGEN2_SUPPORT
+    template<typename OtherDerived>
+    bool isApprox(const DiagonalBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
+    {
+      return diagonal().isApprox(other.diagonal(), precision);
+    }
+    template<typename OtherDerived>
+    bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
+    {
+      return toDenseMatrix().isApprox(other, precision);
+    }
+    #endif
 };

+template<typename Derived>
+template<typename DenseDerived>
+void DiagonalBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
+{
+  other.setZero();
+  other.diagonal() = diagonal();
+}
 #endif

 /** \class DiagonalMatrix
@@ -107,9 +119,10 @@ struct traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
 : traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
  typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
-  typedef DiagonalShape StorageKind;
+  typedef Dense StorageKind;
+  typedef DenseIndex Index;
  enum {
-    Flags = LvalueBit | NoPreferredStorageOrderBit
+    Flags = LvalueBit
  };
 };
 }
@@ -123,7 +136,7 @@ class DiagonalMatrix
    typedef const DiagonalMatrix& Nested;
    typedef _Scalar Scalar;
    typedef typename internal::traits<DiagonalMatrix>::StorageKind StorageKind;
-    typedef typename internal::traits<DiagonalMatrix>::StorageIndex StorageIndex;
+    typedef typename internal::traits<DiagonalMatrix>::Index Index;
    #endif

  protected:
@@ -133,31 +146,24 @@ class DiagonalMatrix
  public:

    /** const version of diagonal(). */
-    EIGEN_DEVICE_FUNC
    inline const DiagonalVectorType& diagonal() const { return m_diagonal; }
    /** \returns a reference to the stored vector of diagonal coefficients. */
-    EIGEN_DEVICE_FUNC
    inline DiagonalVectorType& diagonal() { return m_diagonal; }

    /** Default constructor without initialization */
-    EIGEN_DEVICE_FUNC
    inline DiagonalMatrix() {}

    /** Constructs a diagonal matrix with given dimension  */
-    EIGEN_DEVICE_FUNC
-    explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
+    inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}

    /** 2D constructor. */
-    EIGEN_DEVICE_FUNC
    inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {}

    /** 3D constructor. */
-    EIGEN_DEVICE_FUNC
    inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {}

    /** Copy constructor. */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    inline DiagonalMatrix(const DiagonalBase<OtherDerived>& other) : m_diagonal(other.diagonal()) {}

    #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -167,13 +173,11 @@ class DiagonalMatrix

    /** generic constructor from expression of the diagonal coefficients */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : m_diagonal(other)
    {}

    /** Copy operator. */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    DiagonalMatrix& operator=(const DiagonalBase<OtherDerived>& other)
    {
      m_diagonal = other.diagonal();
@@ -184,7 +188,6 @@ class DiagonalMatrix
    /** This is a special case of the templated operator=. Its purpose is to
      * prevent a default operator= from hiding the templated operator=.
      */
-    EIGEN_DEVICE_FUNC
    DiagonalMatrix& operator=(const DiagonalMatrix& other)
    {
      m_diagonal = other.diagonal();
@@ -193,19 +196,14 @@ class DiagonalMatrix
    #endif

    /** Resizes to given size. */
-    EIGEN_DEVICE_FUNC
    inline void resize(Index size) { m_diagonal.resize(size); }
    /** Sets all coefficients to zero. */
-    EIGEN_DEVICE_FUNC
    inline void setZero() { m_diagonal.setZero(); }
    /** Resizes and sets all coefficients to zero. */
-    EIGEN_DEVICE_FUNC
    inline void setZero(Index size) { m_diagonal.setZero(size); }
    /** Sets this matrix to be the identity matrix of the current size. */
-    EIGEN_DEVICE_FUNC
    inline void setIdentity() { m_diagonal.setOnes(); }
    /** Sets this matrix to be the identity matrix of the given size. */
-    EIGEN_DEVICE_FUNC
    inline void setIdentity(Index size) { m_diagonal.setOnes(size); }
 };

@@ -229,15 +227,14 @@ struct traits<DiagonalWrapper<_DiagonalVectorType> >
 {
  typedef _DiagonalVectorType DiagonalVectorType;
  typedef typename DiagonalVectorType::Scalar Scalar;
-  typedef typename DiagonalVectorType::StorageIndex StorageIndex;
-  typedef DiagonalShape StorageKind;
-  typedef typename traits<DiagonalVectorType>::XprKind XprKind;
+  typedef typename DiagonalVectorType::Index Index;
+  typedef typename DiagonalVectorType::StorageKind StorageKind;
  enum {
    RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
    ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
-    MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
-    MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
-    Flags =  (traits<DiagonalVectorType>::Flags & LvalueBit) | NoPreferredStorageOrderBit
+    MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+    MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+    Flags =  traits<DiagonalVectorType>::Flags & LvalueBit
  };
 };
 }
@@ -253,11 +250,9 @@ class DiagonalWrapper
    #endif

    /** Constructor from expression of diagonal coefficients to wrap. */
-    EIGEN_DEVICE_FUNC
-    explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}
+    inline DiagonalWrapper(DiagonalVectorType& diagonal) : m_diagonal(diagonal) {}

    /** \returns a const reference to the wrapped expression of diagonal coefficients. */
-    EIGEN_DEVICE_FUNC
    const DiagonalVectorType& diagonal() const { return m_diagonal; }

  protected:
@@ -277,7 +272,7 @@ template<typename Derived>
 inline const DiagonalWrapper<const Derived>
 MatrixBase<Derived>::asDiagonal() const
 {
-  return DiagonalWrapper<const Derived>(derived());
+  return derived();
 }

 /** \returns true if *this is approximately equal to a diagonal matrix,
@@ -289,14 +284,13 @@ MatrixBase<Derived>::asDiagonal() const
  * \sa asDiagonal()
  */
 template<typename Derived>
-bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
+bool MatrixBase<Derived>::isDiagonal(RealScalar prec) const
 {
-  using std::abs;
  if(cols() != rows()) return false;
  RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
  for(Index j = 0; j < cols(); ++j)
  {
-    RealScalar absOnDiagonal = abs(coeff(j,j));
+    RealScalar absOnDiagonal = internal::abs(coeff(j,j));
    if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
  }
  for(Index j = 0; j < cols(); ++j)
@@ -308,33 +302,6 @@ bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
  return true;
 }

-namespace internal {
-
-template<> struct storage_kind_to_shape<DiagonalShape> { typedef DiagonalShape Shape; };
-
-struct Diagonal2Dense {};
-
-template<> struct AssignmentKind<DenseShape,DiagonalShape> { typedef Diagonal2Dense Kind; };
-
-// Diagonal matrix to Dense assignment
-template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
-struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense, Scalar>
-{
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
-  {
-    dst.setZero();
-    dst.diagonal() = src.diagonal();
-  }
-  
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar> &/*func*/)
-  { dst.diagonal() += src.diagonal(); }
-  
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar> &/*func*/)
-  { dst.diagonal() -= src.diagonal(); }
-};
-
-} // namespace internal
-
 } // end namespace Eigen

 #endif // EIGEN_DIAGONALMATRIX_H
--- a/Eigen/src/Core/DiagonalProduct.h
+++ b/Eigen/src/Core/DiagonalProduct.h
@@ -13,14 +13,109 @@

 namespace Eigen { 

+namespace internal {
+template<typename MatrixType, typename DiagonalType, int ProductOrder>
+struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
+ : traits<MatrixType>
+{
+  typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
+  enum {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+
+    _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor,
+    _PacketOnDiag = !((int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
+                    ||(int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)),
+    _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
+    // FIXME currently we need same types, but in the future the next rule should be the one
+    //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::Flags)&PacketAccessBit))),
+    _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && ((!_PacketOnDiag) || (bool(int(DiagonalType::Flags)&PacketAccessBit))),
+
+    Flags = (HereditaryBits & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0),
+    CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost
+  };
+};
+}
+
+template<typename MatrixType, typename DiagonalType, int ProductOrder>
+class DiagonalProduct : internal::no_assignment_operator,
+                        public MatrixBase<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
+{
+  public:
+
+    typedef MatrixBase<DiagonalProduct> Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(DiagonalProduct)
+
+    inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal)
+      : m_matrix(matrix), m_diagonal(diagonal)
+    {
+      eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols()));
+    }
+
+    inline Index rows() const { return m_matrix.rows(); }
+    inline Index cols() const { return m_matrix.cols(); }
+
+    const Scalar coeff(Index row, Index col) const
+    {
+      return m_diagonal.diagonal().coeff(ProductOrder == OnTheLeft ? row : col) * m_matrix.coeff(row, col);
+    }
+
+    template<int LoadMode>
+    EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
+    {
+      enum {
+        StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor
+      };
+      const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col;
+
+      return packet_impl<LoadMode>(row,col,indexInDiagonalVector,typename internal::conditional<
+        ((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
+       ||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type());
+    }
+
+  protected:
+    template<int LoadMode>
+    EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const
+    {
+      return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
+                     internal::pset1<PacketScalar>(m_diagonal.diagonal().coeff(id)));
+    }
+
+    template<int LoadMode>
+    EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const
+    {
+      enum {
+        InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
+        DiagonalVectorPacketLoadMode = (LoadMode == Aligned && ((InnerSize%16) == 0)) ? Aligned : Unaligned
+      };
+      return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
+                     m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
+    }
+
+    typename MatrixType::Nested m_matrix;
+    typename DiagonalType::Nested m_diagonal;
+};
+
 /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.
  */
 template<typename Derived>
 template<typename DiagonalDerived>
-inline const Product<Derived, DiagonalDerived, LazyProduct>
-MatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &a_diagonal) const
+inline const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
+MatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &diagonal) const
 {
-  return Product<Derived, DiagonalDerived, LazyProduct>(derived(),a_diagonal.derived());
+  return DiagonalProduct<Derived, DiagonalDerived, OnTheRight>(derived(), diagonal.derived());
+}
+
+/** \returns the diagonal matrix product of \c *this by the matrix \a matrix.
+  */
+template<typename DiagonalDerived>
+template<typename MatrixDerived>
+inline const DiagonalProduct<MatrixDerived, DiagonalDerived, OnTheLeft>
+DiagonalBase<DiagonalDerived>::operator*(const MatrixBase<MatrixDerived> &matrix) const
+{
+  return DiagonalProduct<MatrixDerived, DiagonalDerived, OnTheLeft>(matrix.derived(), derived());
 }

 } // end namespace Eigen
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -29,7 +29,6 @@ template<typename T, typename U,
 struct dot_nocheck
 {
  typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
-  EIGEN_DEVICE_FUNC
  static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
  {
    return a.template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
@@ -40,7 +39,6 @@ template<typename T, typename U>
 struct dot_nocheck<T, U, true>
 {
  typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
-  EIGEN_DEVICE_FUNC
  static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
  {
    return a.transpose().template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
@@ -61,7 +59,6 @@ struct dot_nocheck<T, U, true>
  */
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC
 typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
 MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
 {
@@ -76,6 +73,34 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
  return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
 }

+#ifdef EIGEN2_SUPPORT
+/** \returns the dot product of *this with other, with the Eigen2 convention that the dot product is linear in the first variable
+  * (conjugating the second variable). Of course this only makes a difference in the complex case.
+  *
+  * This method is only available in EIGEN2_SUPPORT mode.
+  *
+  * \only_for_vectors
+  *
+  * \sa dot()
+  */
+template<typename Derived>
+template<typename OtherDerived>
+typename internal::traits<Derived>::Scalar
+MatrixBase<Derived>::eigen2_dot(const MatrixBase<OtherDerived>& other) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
+    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
+  eigen_assert(size() == other.size());
+
+  return internal::dot_nocheck<OtherDerived,Derived>::run(other,*this);
+}
+#endif
+
+
 //---------- implementation of L2 norm and related functions ----------

 /** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
@@ -87,7 +112,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
 template<typename Derived>
 EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
 {
-  return numext::real((*this).cwiseAbs2().sum());
+  return internal::real((*this).cwiseAbs2().sum());
 }

 /** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm.
@@ -99,8 +124,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
 template<typename Derived>
 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
 {
-  EIGEN_USING_STD_MATH(sqrt)
-  return sqrt(squaredNorm());
+  return internal::sqrt(squaredNorm());
 }

 /** \returns an expression of the quotient of *this by its own norm.
@@ -113,7 +137,8 @@ template<typename Derived>
 inline const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::normalized() const
 {
-  typedef typename internal::nested_eval<Derived,2>::type _Nested;
+  typedef typename internal::nested<Derived>::type Nested;
+  typedef typename internal::remove_reference<Nested>::type _Nested;
  _Nested n(derived());
  return n / n.norm();
 }
@@ -138,10 +163,8 @@ template<typename Derived, int p>
 struct lpNorm_selector
 {
  typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
-  EIGEN_DEVICE_FUNC
  static inline RealScalar run(const MatrixBase<Derived>& m)
  {
-    EIGEN_USING_STD_MATH(pow)
    return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
  }
 };
@@ -149,7 +172,6 @@ struct lpNorm_selector
 template<typename Derived>
 struct lpNorm_selector<Derived, 1>
 {
-  EIGEN_DEVICE_FUNC
  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
  {
    return m.cwiseAbs().sum();
@@ -159,7 +181,6 @@ struct lpNorm_selector<Derived, 1>
 template<typename Derived>
 struct lpNorm_selector<Derived, 2>
 {
-  EIGEN_DEVICE_FUNC
  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
  {
    return m.norm();
@@ -169,7 +190,6 @@ struct lpNorm_selector<Derived, 2>
 template<typename Derived>
 struct lpNorm_selector<Derived, Infinity>
 {
-  EIGEN_DEVICE_FUNC
  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
  {
    return m.cwiseAbs().maxCoeff();
@@ -203,11 +223,11 @@ MatrixBase<Derived>::lpNorm() const
 template<typename Derived>
 template<typename OtherDerived>
 bool MatrixBase<Derived>::isOrthogonal
-(const MatrixBase<OtherDerived>& other, const RealScalar& prec) const
+(const MatrixBase<OtherDerived>& other, RealScalar prec) const
 {
-  typename internal::nested_eval<Derived,2>::type nested(derived());
-  typename internal::nested_eval<OtherDerived,2>::type otherNested(other.derived());
-  return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();
+  typename internal::nested<Derived,2>::type nested(derived());
+  typename internal::nested<OtherDerived,2>::type otherNested(other.derived());
+  return internal::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();
 }

 /** \returns true if *this is approximately an unitary matrix,
@@ -222,15 +242,15 @@ bool MatrixBase<Derived>::isOrthogonal
  * Output: \verbinclude MatrixBase_isUnitary.out
  */
 template<typename Derived>
-bool MatrixBase<Derived>::isUnitary(const RealScalar& prec) const
+bool MatrixBase<Derived>::isUnitary(RealScalar prec) const
 {
-  typename internal::nested_eval<Derived,1>::type self(derived());
+  typename Derived::Nested nested(derived());
  for(Index i = 0; i < cols(); ++i)
  {
-    if(!internal::isApprox(self.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
+    if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
      return false;
    for(Index j = 0; j < i; ++j)
-      if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast<Scalar>(1), prec))
+      if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec))
        return false;
  }
  return true;
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -13,9 +13,7 @@

 namespace Eigen {

-/** \class EigenBase
-  * 
-  * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
+/** Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
  *
  * In other words, an EigenBase object is an object that can be copied into a MatrixBase.
  *
@@ -28,52 +26,34 @@ namespace Eigen {
 template<typename Derived> struct EigenBase
 {
 //   typedef typename internal::plain_matrix_type<Derived>::type PlainObject;
-  
-  /** \brief The interface type of indices
-    * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
-    * \deprecated Since Eigen 3.3, its usage is deprecated. Use Eigen::Index instead.
-    * \sa StorageIndex, \ref TopicPreprocessorDirectives.
-    */
-  typedef Eigen::Index Index;

-  // FIXME is it needed?
  typedef typename internal::traits<Derived>::StorageKind StorageKind;
+  typedef typename internal::traits<Derived>::Index Index;

  /** \returns a reference to the derived object */
-  EIGEN_DEVICE_FUNC
  Derived& derived() { return *static_cast<Derived*>(this); }
  /** \returns a const reference to the derived object */
-  EIGEN_DEVICE_FUNC
  const Derived& derived() const { return *static_cast<const Derived*>(this); }

-  EIGEN_DEVICE_FUNC
  inline Derived& const_cast_derived() const
  { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
-  EIGEN_DEVICE_FUNC
  inline const Derived& const_derived() const
  { return *static_cast<const Derived*>(this); }

  /** \returns the number of rows. \sa cols(), RowsAtCompileTime */
-  EIGEN_DEVICE_FUNC
  inline Index rows() const { return derived().rows(); }
  /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
-  EIGEN_DEVICE_FUNC
  inline Index cols() const { return derived().cols(); }
  /** \returns the number of coefficients, which is rows()*cols().
    * \sa rows(), cols(), SizeAtCompileTime. */
-  EIGEN_DEVICE_FUNC
  inline Index size() const { return rows() * cols(); }

  /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */
-  template<typename Dest>
-  EIGEN_DEVICE_FUNC
-  inline void evalTo(Dest& dst) const
+  template<typename Dest> inline void evalTo(Dest& dst) const
  { derived().evalTo(dst); }

  /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */
-  template<typename Dest>
-  EIGEN_DEVICE_FUNC
-  inline void addTo(Dest& dst) const
+  template<typename Dest> inline void addTo(Dest& dst) const
  {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
@@ -83,9 +63,7 @@ template<typename Derived> struct EigenBase
  }

  /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */
-  template<typename Dest>
-  EIGEN_DEVICE_FUNC
-  inline void subTo(Dest& dst) const
+  template<typename Dest> inline void subTo(Dest& dst) const
  {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
@@ -95,8 +73,7 @@ template<typename Derived> struct EigenBase
  }

  /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */
-  template<typename Dest>
-  EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const
+  template<typename Dest> inline void applyThisOnTheRight(Dest& dst) const
  {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
@@ -104,8 +81,7 @@ template<typename Derived> struct EigenBase
  }

  /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */
-  template<typename Dest>
-  EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const
+  template<typename Dest> inline void applyThisOnTheLeft(Dest& dst) const
  {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
@@ -130,7 +106,7 @@ template<typename Derived>
 template<typename OtherDerived>
 Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
 {
-  call_assignment(derived(), other.derived());
+  other.derived().evalTo(derived());
  return derived();
 }

@@ -138,7 +114,7 @@ template<typename Derived>
 template<typename OtherDerived>
 Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
 {
-  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
+  other.derived().addTo(derived());
  return derived();
 }

@@ -146,10 +122,39 @@ template<typename Derived>
 template<typename OtherDerived>
 Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
 {
-  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
+  other.derived().subTo(derived());
  return derived();
 }

+/** replaces \c *this by \c *this * \a other.
+  *
+  * \returns a reference to \c *this
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline Derived&
+MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheRight(derived());
+  return derived();
+}
+
+/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=() */
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheRight(derived());
+}
+
+/** replaces \c *this by \c *this * \a other. */
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheLeft(derived());
+}
+
 } // end namespace Eigen

 #endif // EIGEN_EIGENBASE_H
--- a/Eigen/src/Core/Flagged.h
+++ b/Eigen/src/Core/Flagged.h
@@ -0,0 +1,140 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_FLAGGED_H
+#define EIGEN_FLAGGED_H
+
+namespace Eigen { 
+
+/** \class Flagged
+  * \ingroup Core_Module
+  *
+  * \brief Expression with modified flags
+  *
+  * \param ExpressionType the type of the object of which we are modifying the flags
+  * \param Added the flags added to the expression
+  * \param Removed the flags removed from the expression (has priority over Added).
+  *
+  * This class represents an expression whose flags have been modified.
+  * It is the return type of MatrixBase::flagged()
+  * and most of the time this is the only way it is used.
+  *
+  * \sa MatrixBase::flagged()
+  */
+
+namespace internal {
+template<typename ExpressionType, unsigned int Added, unsigned int Removed>
+struct traits<Flagged<ExpressionType, Added, Removed> > : traits<ExpressionType>
+{
+  enum { Flags = (ExpressionType::Flags | Added) & ~Removed };
+};
+}
+
+template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged
+  : public MatrixBase<Flagged<ExpressionType, Added, Removed> >
+{
+  public:
+
+    typedef MatrixBase<Flagged> Base;
+    
+    EIGEN_DENSE_PUBLIC_INTERFACE(Flagged)
+    typedef typename internal::conditional<internal::must_nest_by_value<ExpressionType>::ret,
+        ExpressionType, const ExpressionType&>::type ExpressionTypeNested;
+    typedef typename ExpressionType::InnerIterator InnerIterator;
+
+    inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {}
+
+    inline Index rows() const { return m_matrix.rows(); }
+    inline Index cols() const { return m_matrix.cols(); }
+    inline Index outerStride() const { return m_matrix.outerStride(); }
+    inline Index innerStride() const { return m_matrix.innerStride(); }
+
+    inline CoeffReturnType coeff(Index row, Index col) const
+    {
+      return m_matrix.coeff(row, col);
+    }
+
+    inline CoeffReturnType coeff(Index index) const
+    {
+      return m_matrix.coeff(index);
+    }
+    
+    inline const Scalar& coeffRef(Index row, Index col) const
+    {
+      return m_matrix.const_cast_derived().coeffRef(row, col);
+    }
+
+    inline const Scalar& coeffRef(Index index) const
+    {
+      return m_matrix.const_cast_derived().coeffRef(index);
+    }
+
+    inline Scalar& coeffRef(Index row, Index col)
+    {
+      return m_matrix.const_cast_derived().coeffRef(row, col);
+    }
+
+    inline Scalar& coeffRef(Index index)
+    {
+      return m_matrix.const_cast_derived().coeffRef(index);
+    }
+
+    template<int LoadMode>
+    inline const PacketScalar packet(Index row, Index col) const
+    {
+      return m_matrix.template packet<LoadMode>(row, col);
+    }
+
+    template<int LoadMode>
+    inline void writePacket(Index row, Index col, const PacketScalar& x)
+    {
+      m_matrix.const_cast_derived().template writePacket<LoadMode>(row, col, x);
+    }
+
+    template<int LoadMode>
+    inline const PacketScalar packet(Index index) const
+    {
+      return m_matrix.template packet<LoadMode>(index);
+    }
+
+    template<int LoadMode>
+    inline void writePacket(Index index, const PacketScalar& x)
+    {
+      m_matrix.const_cast_derived().template writePacket<LoadMode>(index, x);
+    }
+
+    const ExpressionType& _expression() const { return m_matrix; }
+
+    template<typename OtherDerived>
+    typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const;
+
+    template<typename OtherDerived>
+    void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const;
+
+  protected:
+    ExpressionTypeNested m_matrix;
+};
+
+/** \returns an expression of *this with added and removed flags
+  *
+  * This is mostly for internal use.
+  *
+  * \sa class Flagged
+  */
+template<typename Derived>
+template<unsigned int Added,unsigned int Removed>
+inline const Flagged<Derived, Added, Removed>
+DenseBase<Derived>::flagged() const
+{
+  return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_FLAGGED_H
--- a/Eigen/src/Core/ForceAlignedAccess.h
+++ b/Eigen/src/Core/ForceAlignedAccess.h
@@ -39,29 +39,29 @@ template<typename ExpressionType> class ForceAlignedAccess
    typedef typename internal::dense_xpr_base<ForceAlignedAccess>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess)

-    EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
+    inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}

-    EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); }
-    EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); }
-    EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); }
-    EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); }
+    inline Index rows() const { return m_expression.rows(); }
+    inline Index cols() const { return m_expression.cols(); }
+    inline Index outerStride() const { return m_expression.outerStride(); }
+    inline Index innerStride() const { return m_expression.innerStride(); }

-    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const
+    inline const CoeffReturnType coeff(Index row, Index col) const
    {
      return m_expression.coeff(row, col);
    }

-    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
+    inline Scalar& coeffRef(Index row, Index col)
    {
      return m_expression.const_cast_derived().coeffRef(row, col);
    }

-    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const
+    inline const CoeffReturnType coeff(Index index) const
    {
      return m_expression.coeff(index);
    }

-    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
+    inline Scalar& coeffRef(Index index)
    {
      return m_expression.const_cast_derived().coeffRef(index);
    }
@@ -90,7 +90,7 @@ template<typename ExpressionType> class ForceAlignedAccess
      m_expression.const_cast_derived().template writePacket<Aligned>(index, x);
    }

-    EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }
+    operator const ExpressionType&() const { return m_expression; }

  protected:
    const ExpressionType& m_expression;
@@ -127,7 +127,7 @@ template<bool Enable>
 inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type
 MatrixBase<Derived>::forceAlignedAccessIf() const
 {
-  return derived();  // FIXME This should not work but apparently is never used
+  return derived();
 }

 /** \returns an expression of *this with forced aligned access if \a Enable is true.
@@ -138,7 +138,7 @@ template<bool Enable>
 inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type
 MatrixBase<Derived>::forceAlignedAccessIf()
 {
-  return derived();  // FIXME This should not work but apparently is never used
+  return derived();
 }

 } // end namespace Eigen
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -0,0 +1,975 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_FUNCTORS_H
+#define EIGEN_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+// associative functors:
+
+/** \internal
+  * \brief Template functor to compute the sum of two scalars
+  *
+  * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, MatrixBase::sum()
+  */
+template<typename Scalar> struct scalar_sum_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::padd(a,b); }
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+  { return internal::predux(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sum_op<Scalar> > {
+  enum {
+    Cost = NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasAdd
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the product of two scalars
+  *
+  * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
+  */
+template<typename LhsScalar,typename RhsScalar> struct scalar_product_op {
+  enum {
+    // TODO vectorize mixed product
+    Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
+  };
+  typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
+  EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pmul(a,b); }
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+  { return internal::predux_mul(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
+  enum {
+    Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
+    PacketAccess = scalar_product_op<LhsScalar,RhsScalar>::Vectorizable
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the conjugate product of two scalars
+  *
+  * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)
+  */
+template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op {
+
+  enum {
+    Conj = NumTraits<LhsScalar>::IsComplex
+  };
+  
+  typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
+  
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
+  EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
+  { return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
+  
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  { return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
+  enum {
+    Cost = NumTraits<LhsScalar>::MulCost,
+    PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMul
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the min of two scalars
+  *
+  * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff()
+  */
+template<typename Scalar> struct scalar_min_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::min; return (min)(a, b); }
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pmin(a,b); }
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+  { return internal::predux_min(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_min_op<Scalar> > {
+  enum {
+    Cost = NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasMin
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the max of two scalars
+  *
+  * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff()
+  */
+template<typename Scalar> struct scalar_max_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::max; return (max)(a, b); }
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pmax(a,b); }
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+  { return internal::predux_max(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_max_op<Scalar> > {
+  enum {
+    Cost = NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasMax
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the hypot of two scalars
+  *
+  * \sa MatrixBase::stableNorm(), class Redux
+  */
+template<typename Scalar> struct scalar_hypot_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
+//   typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
+  {
+    using std::max;
+    using std::min;
+    Scalar p = (max)(_x, _y);
+    Scalar q = (min)(_x, _y);
+    Scalar qp = q/p;
+    return p * sqrt(Scalar(1) + qp*qp);
+  }
+};
+template<typename Scalar>
+struct functor_traits<scalar_hypot_op<Scalar> > {
+  enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess=0 };
+};
+
+/** \internal
+  * \brief Template functor to compute the pow of two scalars
+  */
+template<typename Scalar, typename OtherScalar> struct scalar_binary_pow_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op)
+  inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return internal::pow(a, b); }
+};
+template<typename Scalar, typename OtherScalar>
+struct functor_traits<scalar_binary_pow_op<Scalar,OtherScalar> > {
+  enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
+};
+
+// other binary functors:
+
+/** \internal
+  * \brief Template functor to compute the difference of two scalars
+  *
+  * \sa class CwiseBinaryOp, MatrixBase::operator-
+  */
+template<typename Scalar> struct scalar_difference_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::psub(a,b); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_difference_op<Scalar> > {
+  enum {
+    Cost = NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasSub
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the quotient of two scalars
+  *
+  * \sa class CwiseBinaryOp, Cwise::operator/()
+  */
+template<typename Scalar> struct scalar_quotient_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a / b; }
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pdiv(a,b); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_quotient_op<Scalar> > {
+  enum {
+    Cost = 2 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasDiv
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the and of two booleans
+  *
+  * \sa class CwiseBinaryOp, ArrayBase::operator&&
+  */
+struct scalar_boolean_and_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
+  EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
+};
+template<> struct functor_traits<scalar_boolean_and_op> {
+  enum {
+    Cost = NumTraits<bool>::AddCost,
+    PacketAccess = false
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the or of two booleans
+  *
+  * \sa class CwiseBinaryOp, ArrayBase::operator||
+  */
+struct scalar_boolean_or_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
+  EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
+};
+template<> struct functor_traits<scalar_boolean_or_op> {
+  enum {
+    Cost = NumTraits<bool>::AddCost,
+    PacketAccess = false
+  };
+};
+
+// unary functors:
+
+/** \internal
+  * \brief Template functor to compute the opposite of a scalar
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::operator-
+  */
+template<typename Scalar> struct scalar_opposite_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  { return internal::pnegate(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_opposite_op<Scalar> >
+{ enum {
+    Cost = NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasNegate };
+};
+
+/** \internal
+  * \brief Template functor to compute the absolute value of a scalar
+  *
+  * \sa class CwiseUnaryOp, Cwise::abs
+  */
+template<typename Scalar> struct scalar_abs_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return internal::abs(a); }
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  { return internal::pabs(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_abs_op<Scalar> >
+{
+  enum {
+    Cost = NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasAbs
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the squared absolute value of a scalar
+  *
+  * \sa class CwiseUnaryOp, Cwise::abs2
+  */
+template<typename Scalar> struct scalar_abs2_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return internal::abs2(a); }
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  { return internal::pmul(a,a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_abs2_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasAbs2 }; };
+
+/** \internal
+  * \brief Template functor to compute the conjugate of a complex value
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::conjugate()
+  */
+template<typename Scalar> struct scalar_conjugate_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return internal::conj(a); }
+  template<typename Packet>
+  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_conjugate_op<Scalar> >
+{
+  enum {
+    Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
+    PacketAccess = packet_traits<Scalar>::HasConj
+  };
+};
+
+/** \internal
+  * \brief Template functor to cast a scalar to another type
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::cast()
+  */
+template<typename Scalar, typename NewType>
+struct scalar_cast_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
+  typedef NewType result_type;
+  EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
+};
+template<typename Scalar, typename NewType>
+struct functor_traits<scalar_cast_op<Scalar,NewType> >
+{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
+
+/** \internal
+  * \brief Template functor to extract the real part of a complex
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::real()
+  */
+template<typename Scalar>
+struct scalar_real_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return internal::real(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_real_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+  * \brief Template functor to extract the imaginary part of a complex
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::imag()
+  */
+template<typename Scalar>
+struct scalar_imag_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return internal::imag(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_imag_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+  * \brief Template functor to extract the real part of a complex as a reference
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::real()
+  */
+template<typename Scalar>
+struct scalar_real_ref_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return internal::real_ref(*const_cast<Scalar*>(&a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_real_ref_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+  * \brief Template functor to extract the imaginary part of a complex as a reference
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::imag()
+  */
+template<typename Scalar>
+struct scalar_imag_ref_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return internal::imag_ref(*const_cast<Scalar*>(&a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_imag_ref_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+  *
+  * \brief Template functor to compute the exponential of a scalar
+  *
+  * \sa class CwiseUnaryOp, Cwise::exp()
+  */
+template<typename Scalar> struct scalar_exp_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
+  inline const Scalar operator() (const Scalar& a) const { return internal::exp(a); }
+  typedef typename packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_exp_op<Scalar> >
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasExp }; };
+
+/** \internal
+  *
+  * \brief Template functor to compute the logarithm of a scalar
+  *
+  * \sa class CwiseUnaryOp, Cwise::log()
+  */
+template<typename Scalar> struct scalar_log_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
+  inline const Scalar operator() (const Scalar& a) const { return internal::log(a); }
+  typedef typename packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_log_op<Scalar> >
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog }; };
+
+/** \internal
+  * \brief Template functor to multiply a scalar by a fixed other one
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
+  */
+/* NOTE why doing the pset1() in packetOp *is* an optimization ?
+ * indeed it seems better to declare m_other as a Packet and do the pset1() once
+ * in the constructor. However, in practice:
+ *  - GCC does not like m_other as a Packet and generate a load every time it needs it
+ *  - on the other hand GCC is able to moves the pset1() outside the loop :)
+ *  - simpler code ;)
+ * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y)
+ */
+template<typename Scalar>
+struct scalar_multiple_op {
+  typedef typename packet_traits<Scalar>::type Packet;
+  // FIXME default copy constructors seems bugged with std::complex<>
+  EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { }
+  EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { }
+  EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
+  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  { return internal::pmul(a, pset1<Packet>(m_other)); }
+  typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_multiple_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+
+template<typename Scalar1, typename Scalar2>
+struct scalar_multiple2_op {
+  typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
+  EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { }
+  EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
+  EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
+  typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
+};
+template<typename Scalar1,typename Scalar2>
+struct functor_traits<scalar_multiple2_op<Scalar1,Scalar2> >
+{ enum { Cost = NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
+
+/** \internal
+  * \brief Template functor to divide a scalar by a fixed other one
+  *
+  * This functor is used to implement the quotient of a matrix by
+  * a scalar where the scalar type is not necessarily a floating point type.
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::operator/
+  */
+template<typename Scalar>
+struct scalar_quotient1_op {
+  typedef typename packet_traits<Scalar>::type Packet;
+  // FIXME default copy constructors seems bugged with std::complex<>
+  EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { }
+  EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
+  EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
+  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  { return internal::pdiv(a, pset1<Packet>(m_other)); }
+  typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_quotient1_op<Scalar> >
+{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
+
+// nullary functors
+
+template<typename Scalar>
+struct scalar_constant_op {
+  typedef typename packet_traits<Scalar>::type Packet;
+  EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
+  EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
+  template<typename Index>
+  EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
+  template<typename Index>
+  EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1<Packet>(m_other); }
+  const Scalar m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_constant_op<Scalar> >
+// FIXME replace this packet test by a safe one
+{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
+
+template<typename Scalar> struct scalar_identity_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
+  template<typename Index>
+  EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_identity_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
+
+template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
+
+// linear access for packet ops:
+// 1) initialization
+//   base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
+// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
+//   base += [size*step, ..., size*step]
+//
+// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
+//       in order to avoid the padd() in operator() ?
+template <typename Scalar>
+struct linspaced_op_impl<Scalar,false>
+{
+  typedef typename packet_traits<Scalar>::type Packet;
+
+  linspaced_op_impl(Scalar low, Scalar step) :
+  m_low(low), m_step(step),
+  m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
+  m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
+
+  template<typename Index>
+  EIGEN_STRONG_INLINE const Scalar operator() (Index i) const 
+  { 
+    m_base = padd(m_base, pset1<Packet>(m_step));
+    return m_low+i*m_step; 
+  }
+
+  template<typename Index>
+  EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }
+
+  const Scalar m_low;
+  const Scalar m_step;
+  const Packet m_packetStep;
+  mutable Packet m_base;
+};
+
+// random access for packet ops:
+// 1) each step
+//   [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
+template <typename Scalar>
+struct linspaced_op_impl<Scalar,true>
+{
+  typedef typename packet_traits<Scalar>::type Packet;
+
+  linspaced_op_impl(Scalar low, Scalar step) :
+  m_low(low), m_step(step),
+  m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
+
+  template<typename Index>
+  EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
+
+  template<typename Index>
+  EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
+  { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(i),m_interPacket))); }
+
+  const Scalar m_low;
+  const Scalar m_step;
+  const Packet m_lowPacket;
+  const Packet m_stepPacket;
+  const Packet m_interPacket;
+};
+
+// ----- Linspace functor ----------------------------------------------------------------
+
+// Forward declaration (we default to random access which does not really give
+// us a speed gain when using packet access but it allows to use the functor in
+// nested expressions).
+template <typename Scalar, bool RandomAccess = true> struct linspaced_op;
+template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,RandomAccess> >
+{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
+template <typename Scalar, bool RandomAccess> struct linspaced_op
+{
+  typedef typename packet_traits<Scalar>::type Packet;
+  linspaced_op(Scalar low, Scalar high, int num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {}
+
+  template<typename Index>
+  EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
+
+  // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
+  // there row==0 and col is used for the actual iteration.
+  template<typename Index>
+  EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const 
+  {
+    eigen_assert(col==0 || row==0);
+    return impl(col + row);
+  }
+
+  template<typename Index>
+  EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
+
+  // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
+  // there row==0 and col is used for the actual iteration.
+  template<typename Index>
+  EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
+  {
+    eigen_assert(col==0 || row==0);
+    return impl.packetOp(col + row);
+  }
+
+  // This proxy object handles the actual required temporaries, the different
+  // implementations (random vs. sequential access) as well as the
+  // correct piping to size 2/4 packet operations.
+  const linspaced_op_impl<Scalar,RandomAccess> impl;
+};
+
+// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
+// to indicate whether a functor allows linear access, just always answering 'yes' except for
+// scalar_identity_op.
+// FIXME move this to functor_traits adding a functor_default
+template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; };
+template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; };
+
+// in CwiseBinaryOp, we require the Lhs and Rhs to have the same scalar type, except for multiplication
+// where we only require them to have the same _real_ scalar type so one may multiply, say, float by complex<float>.
+// FIXME move this to functor_traits adding a functor_default
+template<typename Functor> struct functor_allows_mixing_real_and_complex { enum { ret = 0 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_allows_mixing_real_and_complex<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_allows_mixing_real_and_complex<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+
+
+/** \internal
+  * \brief Template functor to add a scalar to a fixed other one
+  * \sa class CwiseUnaryOp, Array::operator+
+  */
+/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */
+template<typename Scalar>
+struct scalar_add_op {
+  typedef typename packet_traits<Scalar>::type Packet;
+  // FIXME default copy constructors seems bugged with std::complex<>
+  inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { }
+  inline scalar_add_op(const Scalar& other) : m_other(other) { }
+  inline Scalar operator() (const Scalar& a) const { return a + m_other; }
+  inline const Packet packetOp(const Packet& a) const
+  { return internal::padd(a, pset1<Packet>(m_other)); }
+  const Scalar m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_add_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
+
+/** \internal
+  * \brief Template functor to compute the square root of a scalar
+  * \sa class CwiseUnaryOp, Cwise::sqrt()
+  */
+template<typename Scalar> struct scalar_sqrt_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
+  inline const Scalar operator() (const Scalar& a) const { return internal::sqrt(a); }
+  typedef typename packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sqrt_op<Scalar> >
+{ enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasSqrt
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the cosine of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::cos()
+  */
+template<typename Scalar> struct scalar_cos_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
+  inline Scalar operator() (const Scalar& a) const { return internal::cos(a); }
+  typedef typename packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cos_op<Scalar> >
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasCos
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the sine of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::sin()
+  */
+template<typename Scalar> struct scalar_sin_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
+  inline const Scalar operator() (const Scalar& a) const { return internal::sin(a); }
+  typedef typename packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sin_op<Scalar> >
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasSin
+  };
+};
+
+
+/** \internal
+  * \brief Template functor to compute the tan of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::tan()
+  */
+template<typename Scalar> struct scalar_tan_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
+  inline const Scalar operator() (const Scalar& a) const { return internal::tan(a); }
+  typedef typename packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_tan_op<Scalar> >
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasTan
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the arc cosine of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::acos()
+  */
+template<typename Scalar> struct scalar_acos_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
+  inline const Scalar operator() (const Scalar& a) const { return internal::acos(a); }
+  typedef typename packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_acos_op<Scalar> >
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasACos
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the arc sine of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::asin()
+  */
+template<typename Scalar> struct scalar_asin_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
+  inline const Scalar operator() (const Scalar& a) const { return internal::asin(a); }
+  typedef typename packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_asin_op<Scalar> >
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasASin
+  };
+};
+
+/** \internal
+  * \brief Template functor to raise a scalar to a power
+  * \sa class CwiseUnaryOp, Cwise::pow
+  */
+template<typename Scalar>
+struct scalar_pow_op {
+  // FIXME default copy constructors seems bugged with std::complex<>
+  inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { }
+  inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {}
+  inline Scalar operator() (const Scalar& a) const { return internal::pow(a, m_exponent); }
+  const Scalar m_exponent;
+};
+template<typename Scalar>
+struct functor_traits<scalar_pow_op<Scalar> >
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
+
+/** \internal
+  * \brief Template functor to compute the quotient between a scalar and array entries.
+  * \sa class CwiseUnaryOp, Cwise::inverse()
+  */
+template<typename Scalar>
+struct scalar_inverse_mult_op {
+  scalar_inverse_mult_op(const Scalar& other) : m_other(other) {}
+  inline Scalar operator() (const Scalar& a) const { return m_other / a; }
+  template<typename Packet>
+  inline const Packet packetOp(const Packet& a) const
+  { return internal::pdiv(pset1<Packet>(m_other),a); }
+  Scalar m_other;
+};
+
+/** \internal
+  * \brief Template functor to compute the inverse of a scalar
+  * \sa class CwiseUnaryOp, Cwise::inverse()
+  */
+template<typename Scalar>
+struct scalar_inverse_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
+  inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
+  template<typename Packet>
+  inline const Packet packetOp(const Packet& a) const
+  { return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_inverse_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
+
+/** \internal
+  * \brief Template functor to compute the square of a scalar
+  * \sa class CwiseUnaryOp, Cwise::square()
+  */
+template<typename Scalar>
+struct scalar_square_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
+  inline Scalar operator() (const Scalar& a) const { return a*a; }
+  template<typename Packet>
+  inline const Packet packetOp(const Packet& a) const
+  { return internal::pmul(a,a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_square_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+
+/** \internal
+  * \brief Template functor to compute the cube of a scalar
+  * \sa class CwiseUnaryOp, Cwise::cube()
+  */
+template<typename Scalar>
+struct scalar_cube_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
+  inline Scalar operator() (const Scalar& a) const { return a*a*a; }
+  template<typename Packet>
+  inline const Packet packetOp(const Packet& a) const
+  { return internal::pmul(a,pmul(a,a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cube_op<Scalar> >
+{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+
+// default functor traits for STL functors:
+
+template<typename T>
+struct functor_traits<std::multiplies<T> >
+{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::divides<T> >
+{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::plus<T> >
+{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::minus<T> >
+{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::negate<T> >
+{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::logical_or<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::logical_and<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::logical_not<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::greater<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::less<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::greater_equal<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::less_equal<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::equal_to<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::not_equal_to<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::binder2nd<T> >
+{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::binder1st<T> >
+{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::unary_negate<T> >
+{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::binary_negate<T> >
+{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
+
+#ifdef EIGEN_STDEXT_SUPPORT
+
+template<typename T0,typename T1>
+struct functor_traits<std::project1st<T0,T1> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::project2nd<T0,T1> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::select2nd<std::pair<T0,T1> > >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::select1st<std::pair<T0,T1> > >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::unary_compose<T0,T1> >
+{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost, PacketAccess = false }; };
+
+template<typename T0,typename T1,typename T2>
+struct functor_traits<std::binary_compose<T0,T1,T2> >
+{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost + functor_traits<T2>::Cost, PacketAccess = false }; };
+
+#endif // EIGEN_STDEXT_SUPPORT
+
+// allow to add new functors and specializations of functor_traits from outside Eigen.
+// this macro is really needed because functor_traits must be specialized after it is declared but before it is used...
+#ifdef EIGEN_FUNCTORS_PLUGIN
+#include EIGEN_FUNCTORS_PLUGIN
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_FUNCTORS_H
--- a/Eigen/src/Core/Fuzzy.h
+++ b/Eigen/src/Core/Fuzzy.h
@@ -19,20 +19,19 @@ namespace internal
 template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
 struct isApprox_selector
 {
-  EIGEN_DEVICE_FUNC
-  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
+  static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar prec)
  {
-    typename internal::nested_eval<Derived,2>::type nested(x);
-    typename internal::nested_eval<OtherDerived,2>::type otherNested(y);
-    return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
+    using std::min;
+    typename internal::nested<Derived,2>::type nested(x);
+    typename internal::nested<OtherDerived,2>::type otherNested(y);
+    return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
  }
 };

 template<typename Derived, typename OtherDerived>
 struct isApprox_selector<Derived, OtherDerived, true>
 {
-  EIGEN_DEVICE_FUNC
-  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&)
+  static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar)
  {
    return x.matrix() == y.matrix();
  }
@@ -41,18 +40,16 @@ struct isApprox_selector<Derived, OtherDerived, true>
 template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
 struct isMuchSmallerThan_object_selector
 {
-  EIGEN_DEVICE_FUNC
-  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
+  static bool run(const Derived& x, const OtherDerived& y, typename Derived::RealScalar prec)
  {
-    return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum();
+    return x.cwiseAbs2().sum() <= abs2(prec) * y.cwiseAbs2().sum();
  }
 };

 template<typename Derived, typename OtherDerived>
 struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
 {
-  EIGEN_DEVICE_FUNC
-  static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&)
+  static bool run(const Derived& x, const OtherDerived&, typename Derived::RealScalar)
  {
    return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
  }
@@ -61,18 +58,16 @@ struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
 template<typename Derived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
 struct isMuchSmallerThan_scalar_selector
 {
-  EIGEN_DEVICE_FUNC
-  static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec)
+  static bool run(const Derived& x, const typename Derived::RealScalar& y, typename Derived::RealScalar prec)
  {
-    return x.cwiseAbs2().sum() <= numext::abs2(prec * y);
+    return x.cwiseAbs2().sum() <= abs2(prec * y);
  }
 };

 template<typename Derived>
 struct isMuchSmallerThan_scalar_selector<Derived, true>
 {
-  EIGEN_DEVICE_FUNC
-  static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&)
+  static bool run(const Derived& x, const typename Derived::RealScalar&, typename Derived::RealScalar)
  {
    return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
  }
@@ -102,7 +97,7 @@ template<typename Derived>
 template<typename OtherDerived>
 bool DenseBase<Derived>::isApprox(
  const DenseBase<OtherDerived>& other,
-  const RealScalar& prec
+  RealScalar prec
 ) const
 {
  return internal::isApprox_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
@@ -124,7 +119,7 @@ bool DenseBase<Derived>::isApprox(
 template<typename Derived>
 bool DenseBase<Derived>::isMuchSmallerThan(
  const typename NumTraits<Scalar>::Real& other,
-  const RealScalar& prec
+  RealScalar prec
 ) const
 {
  return internal::isMuchSmallerThan_scalar_selector<Derived>::run(derived(), other, prec);
@@ -144,7 +139,7 @@ template<typename Derived>
 template<typename OtherDerived>
 bool DenseBase<Derived>::isMuchSmallerThan(
  const DenseBase<OtherDerived>& other,
-  const RealScalar& prec
+  RealScalar prec
 ) const
 {
  return internal::isMuchSmallerThan_object_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -11,7 +11,29 @@
 #ifndef EIGEN_GENERAL_PRODUCT_H
 #define EIGEN_GENERAL_PRODUCT_H

-namespace Eigen {
+namespace Eigen { 
+
+/** \class GeneralProduct
+  * \ingroup Core_Module
+  *
+  * \brief Expression of the product of two general matrices or vectors
+  *
+  * \param LhsNested the type used to store the left-hand side
+  * \param RhsNested the type used to store the right-hand side
+  * \param ProductMode the type of the product
+  *
+  * This class represents an expression of the product of two general matrices.
+  * We call a general matrix, a dense matrix with full storage. For instance,
+  * This excludes triangular, selfadjoint, and sparse matrices.
+  * It is the return type of the operator* between general matrices. Its template
+  * arguments are determined automatically by ProductReturnType. Therefore,
+  * GeneralProduct should never be used direclty. To determine the result type of a
+  * function which involves a matrix product, use ProductReturnType::Type.
+  *
+  * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
+  */
+template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
+class GeneralProduct;

 enum {
  Large = 2,
@@ -37,14 +59,15 @@ template<typename Lhs, typename Rhs> struct product_type
  typedef typename remove_all<Lhs>::type _Lhs;
  typedef typename remove_all<Rhs>::type _Rhs;
  enum {
-    MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,
-    Rows    = traits<_Lhs>::RowsAtCompileTime,
-    MaxCols = traits<_Rhs>::MaxColsAtCompileTime,
-    Cols    = traits<_Rhs>::ColsAtCompileTime,
-    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime,
-                                           traits<_Rhs>::MaxRowsAtCompileTime),
-    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime,
-                                        traits<_Rhs>::RowsAtCompileTime)
+    MaxRows  = _Lhs::MaxRowsAtCompileTime,
+    Rows  = _Lhs::RowsAtCompileTime,
+    MaxCols  = _Rhs::MaxColsAtCompileTime,
+    Cols  = _Rhs::ColsAtCompileTime,
+    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
+                                           _Rhs::MaxRowsAtCompileTime),
+    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
+                                        _Rhs::RowsAtCompileTime),
+    LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
  };

  // the splitting into different lines of code here, introducing the _select enums and the typedef below,
@@ -59,8 +82,7 @@ private:

 public:
  enum {
-    value = selector::ret,
-    ret = selector::ret
+    value = selector::ret
  };
 #ifdef EIGEN_DEBUG_PRODUCT
  static void debug()
@@ -76,31 +98,6 @@ public:
 #endif
 };

-// template<typename Lhs, typename Rhs> struct product_tag
-// {
-// private:
-//   
-//   typedef typename remove_all<Lhs>::type _Lhs;
-//   typedef typename remove_all<Rhs>::type _Rhs;
-//   enum {
-//     Rows  = _Lhs::RowsAtCompileTime,
-//     Cols  = _Rhs::ColsAtCompileTime,
-//     Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, _Rhs::RowsAtCompileTime)
-//   };
-// 
-//   enum {
-//     rows_select = Rows==1 ? int(Rows) : int(Large),
-//     cols_select = Cols==1 ? int(Cols) : int(Large),
-//     depth_select = Depth==1 ? int(Depth) : int(Large)
-//   };
-//   typedef product_type_selector<rows_select, cols_select, depth_select> selector;
-// 
-// public:
-//   enum {
-//     ret = selector::ret
-//   };
-// 
-// };

 /* The following allows to select the kind of product at compile time
 * based on the three dimensions of the product.
@@ -131,6 +128,54 @@ template<>              struct product_type_selector<Large,Large,Small>  { enum

 } // end namespace internal

+/** \class ProductReturnType
+  * \ingroup Core_Module
+  *
+  * \brief Helper class to get the correct and optimized returned type of operator*
+  *
+  * \param Lhs the type of the left-hand side
+  * \param Rhs the type of the right-hand side
+  * \param ProductMode the type of the product (determined automatically by internal::product_mode)
+  *
+  * This class defines the typename Type representing the optimized product expression
+  * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
+  * is the recommended way to define the result type of a function returning an expression
+  * which involve a matrix product. The class Product should never be
+  * used directly.
+  *
+  * \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
+  */
+template<typename Lhs, typename Rhs, int ProductType>
+struct ProductReturnType
+{
+  // TODO use the nested type to reduce instanciations ????
+//   typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+//   typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+
+  typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
+};
+
+template<typename Lhs, typename Rhs>
+struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
+{
+  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
+  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
+  typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
+};
+
+template<typename Lhs, typename Rhs>
+struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
+{
+  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
+  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
+  typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
+};
+
+// this is a workaround for sun CC
+template<typename Lhs, typename Rhs>
+struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
+{};
+
 /***********************************************************************
 *  Implementation of Inner Vector Vector Product
 ***********************************************************************/
@@ -142,10 +187,97 @@ template<>              struct product_type_selector<Large,Large,Small>  { enum
 // product ends up to a row-vector times col-vector product... To tackle this use
 // case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);

+namespace internal {
+
+template<typename Lhs, typename Rhs>
+struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
+ : traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
+{};
+
+}
+
+template<typename Lhs, typename Rhs>
+class GeneralProduct<Lhs, Rhs, InnerProduct>
+  : internal::no_assignment_operator,
+    public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
+{
+    typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
+  public:
+    GeneralProduct(const Lhs& lhs, const Rhs& rhs)
+    {
+      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
+        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
+      Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
+    }
+
+    /** Convertion to scalar */
+    operator const typename Base::Scalar() const {
+      return Base::coeff(0,0);
+    }
+};
+
 /***********************************************************************
 *  Implementation of Outer Vector Vector Product
 ***********************************************************************/

+namespace internal {
+template<int StorageOrder> struct outer_product_selector;
+
+template<typename Lhs, typename Rhs>
+struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
+ : traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
+{};
+
+}
+
+template<typename Lhs, typename Rhs>
+class GeneralProduct<Lhs, Rhs, OuterProduct>
+  : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
+{
+  public:
+    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
+
+    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
+    {
+      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
+        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+    }
+
+    template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+    {
+      internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
+    }
+};
+
+namespace internal {
+
+template<> struct outer_product_selector<ColMajor> {
+  template<typename ProductType, typename Dest>
+  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
+    typedef typename Dest::Index Index;
+    // FIXME make sure lhs is sequentially stored
+    // FIXME not very good if rhs is real and lhs complex while alpha is real too
+    const Index cols = dest.cols();
+    for (Index j=0; j<cols; ++j)
+      dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
+  }
+};
+
+template<> struct outer_product_selector<RowMajor> {
+  template<typename ProductType, typename Dest>
+  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
+    typedef typename Dest::Index Index;
+    // FIXME make sure rhs is sequentially stored
+    // FIXME not very good if lhs is real and rhs complex while alpha is real too
+    const Index rows = dest.rows();
+    for (Index i=0; i<rows; ++i)
+      dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
+  }
+};
+
+} // end namespace internal
+
 /***********************************************************************
 *  Implementation of General Matrix Vector Product
 ***********************************************************************/
@@ -159,13 +291,60 @@ template<>              struct product_type_selector<Large,Large,Small>  { enum
 */
 namespace internal {

+template<typename Lhs, typename Rhs>
+struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
+ : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
+{};
+
 template<int Side, int StorageOrder, bool BlasCompatible>
-struct gemv_dense_sense_selector;
+struct gemv_selector;

 } // end namespace internal

+template<typename Lhs, typename Rhs>
+class GeneralProduct<Lhs, Rhs, GemvProduct>
+  : public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
+{
+  public:
+    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
+
+    typedef typename Lhs::Scalar LhsScalar;
+    typedef typename Rhs::Scalar RhsScalar;
+
+    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
+    {
+//       EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
+//         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+    }
+
+    enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
+    typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
+
+    template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+    {
+      eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
+      internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
+                       bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
+    }
+};
+
 namespace internal {

+// The vector is on the left => transposition
+template<int StorageOrder, bool BlasCompatible>
+struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
+{
+  template<typename ProductType, typename Dest>
+  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  {
+    Transpose<Dest> destT(dest);
+    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
+    gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
+      ::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
+        (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
+  }
+};
+
 template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;

 template<typename Scalar,int Size,int MaxSize>
@@ -183,7 +362,7 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
 template<typename Scalar,int Size,int MaxSize>
 struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
 {
-  #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
+  #if EIGEN_ALIGN_STATICALLY
  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
  #else
@@ -196,48 +375,33 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
  EIGEN_STRONG_INLINE Scalar* data() {
    return ForceAlignment
-            ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
+            ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
            : m_data.array;
  }
  #endif
 };

-// The vector is on the left => transposition
-template<int StorageOrder, bool BlasCompatible>
-struct gemv_dense_sense_selector<OnTheLeft,StorageOrder,BlasCompatible>
+template<> struct gemv_selector<OnTheRight,ColMajor,true>
 {
-  template<typename Lhs, typename Rhs, typename Dest>
-  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+  template<typename ProductType, typename Dest>
+  static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
  {
-    Transpose<Dest> destT(dest);
-    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
-    gemv_dense_sense_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
-      ::run(rhs.transpose(), lhs.transpose(), destT, alpha);
-  }
-};
-
-template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,true>
-{
-  template<typename Lhs, typename Rhs, typename Dest>
-  static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
-  {
-    typedef typename Lhs::Scalar   LhsScalar;
-    typedef typename Rhs::Scalar   RhsScalar;
-    typedef typename Dest::Scalar  ResScalar;
-    typedef typename Dest::RealScalar  RealScalar;
-    
-    typedef internal::blas_traits<Lhs> LhsBlasTraits;
-    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
-    typedef internal::blas_traits<Rhs> RhsBlasTraits;
-    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
-  
+    typedef typename ProductType::Index Index;
+    typedef typename ProductType::LhsScalar   LhsScalar;
+    typedef typename ProductType::RhsScalar   RhsScalar;
+    typedef typename ProductType::Scalar      ResScalar;
+    typedef typename ProductType::RealScalar  RealScalar;
+    typedef typename ProductType::ActualLhsType ActualLhsType;
+    typedef typename ProductType::ActualRhsType ActualRhsType;
+    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
+    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
    typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;

-    ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
-    ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
+    ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
+    ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());

-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
-                                  * RhsBlasTraits::extractScalarFactor(rhs);
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
+                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());

    enum {
      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
@@ -249,18 +413,18 @@ template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,true>

    gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;

-    const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
-    const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
-
+    bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
+    bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
+    
    RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);

    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
                                                  evalToDest ? dest.data() : static_dest.data());
-
+    
    if(!evalToDest)
    {
      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      Index size = dest.size();
+      int size = dest.size();
      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      #endif
      if(!alphaIsCompatible)
@@ -272,13 +436,11 @@ template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,true>
        MappedDest(actualDestPtr, dest.size()) = dest;
    }

-    typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
-    typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
    general_matrix_vector_product
-        <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+      <Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
        actualLhs.rows(), actualLhs.cols(),
-        LhsMapper(actualLhs.data(), actualLhs.outerStride()),
-        RhsMapper(actualRhs.data(), actualRhs.innerStride()),
+        actualLhs.data(), actualLhs.outerStride(),
+        actualRhs.data(), actualRhs.innerStride(),
        actualDestPtr, 1,
        compatibleAlpha);

@@ -292,34 +454,34 @@ template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,true>
  }
 };

-template<> struct gemv_dense_sense_selector<OnTheRight,RowMajor,true>
+template<> struct gemv_selector<OnTheRight,RowMajor,true>
 {
-  template<typename Lhs, typename Rhs, typename Dest>
-  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+  template<typename ProductType, typename Dest>
+  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
  {
-    typedef typename Lhs::Scalar   LhsScalar;
-    typedef typename Rhs::Scalar   RhsScalar;
-    typedef typename Dest::Scalar  ResScalar;
-    
-    typedef internal::blas_traits<Lhs> LhsBlasTraits;
-    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
-    typedef internal::blas_traits<Rhs> RhsBlasTraits;
-    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
-    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+    typedef typename ProductType::LhsScalar LhsScalar;
+    typedef typename ProductType::RhsScalar RhsScalar;
+    typedef typename ProductType::Scalar    ResScalar;
+    typedef typename ProductType::Index Index;
+    typedef typename ProductType::ActualLhsType ActualLhsType;
+    typedef typename ProductType::ActualRhsType ActualRhsType;
+    typedef typename ProductType::_ActualRhsType _ActualRhsType;
+    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
+    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;

-    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
-    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
+    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());

-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
-                                  * RhsBlasTraits::extractScalarFactor(rhs);
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
+                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());

    enum {
      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
      // on, the other hand it is good for the cache to pack the vector anyways...
-      DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
+      DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
    };

-    gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+    gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;

    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
        DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
@@ -327,45 +489,45 @@ template<> struct gemv_dense_sense_selector<OnTheRight,RowMajor,true>
    if(!DirectlyUseRhs)
    {
      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      Index size = actualRhs.size();
+      int size = actualRhs.size();
      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      #endif
-      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
    }

-    typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
-    typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
    general_matrix_vector_product
-        <Index,LhsScalar,LhsMapper,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+      <Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
        actualLhs.rows(), actualLhs.cols(),
-        LhsMapper(actualLhs.data(), actualLhs.outerStride()),
-        RhsMapper(actualRhsPtr, 1),
+        actualLhs.data(), actualLhs.outerStride(),
+        actualRhsPtr, 1,
        dest.data(), dest.innerStride(),
        actualAlpha);
  }
 };

-template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,false>
+template<> struct gemv_selector<OnTheRight,ColMajor,false>
 {
-  template<typename Lhs, typename Rhs, typename Dest>
-  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+  template<typename ProductType, typename Dest>
+  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
  {
+    typedef typename Dest::Index Index;
    // TODO makes sure dest is sequentially stored in memory, otherwise use a temp
-    const Index size = rhs.rows();
+    const Index size = prod.rhs().rows();
    for(Index k=0; k<size; ++k)
-      dest += (alpha*rhs.coeff(k)) * lhs.col(k);
+      dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
  }
 };

-template<> struct gemv_dense_sense_selector<OnTheRight,RowMajor,false>
+template<> struct gemv_selector<OnTheRight,RowMajor,false>
 {
-  template<typename Lhs, typename Rhs, typename Dest>
-  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+  template<typename ProductType, typename Dest>
+  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
  {
+    typedef typename Dest::Index Index;
    // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
-    const Index rows = dest.rows();
+    const Index rows = prod.rows();
    for(Index i=0; i<rows; ++i)
-      dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(rhs.transpose())).sum();
+      dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
  }
 };

@@ -381,11 +543,9 @@ template<> struct gemv_dense_sense_selector<OnTheRight,RowMajor,false>
  *
  * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
  */
-#ifndef __CUDACC__
-
 template<typename Derived>
 template<typename OtherDerived>
-inline const Product<Derived, OtherDerived>
+inline const typename ProductReturnType<Derived, OtherDerived>::Type
 MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
 {
  // A note regarding the function declaration: In MSVC, this function will sometimes
@@ -410,12 +570,9 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
 #ifdef EIGEN_DEBUG_PRODUCT
  internal::product_type<Derived,OtherDerived>::debug();
 #endif
-
-  return Product<Derived, OtherDerived>(derived(), other.derived());
+  return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
 }

-#endif // __CUDACC__
-
 /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
  *
  * The returned product will behave like any other expressions: the coefficients of the product will be
@@ -429,7 +586,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
  */
 template<typename Derived>
 template<typename OtherDerived>
-const Product<Derived,OtherDerived,LazyProduct>
+const typename LazyProductReturnType<Derived,OtherDerived>::Type
 MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
 {
  enum {
@@ -448,7 +605,7 @@ MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)

-  return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived());
+  return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
 }

 } // end namespace Eigen
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -42,27 +42,21 @@ namespace internal {
 struct default_packet_traits
 {
  enum {
-    HasHalfPacket = 0,
-    
    HasAdd    = 1,
    HasSub    = 1,
    HasMul    = 1,
    HasNegate = 1,
    HasAbs    = 1,
-    HasArg    = 0,
    HasAbs2   = 1,
    HasMin    = 1,
    HasMax    = 1,
    HasConj   = 1,
    HasSetLinear = 1,
-    HasBlend  = 0,

    HasDiv    = 0,
    HasSqrt   = 0,
-    HasRsqrt  = 0,
    HasExp    = 0,
    HasLog    = 0,
-    HasLog10    = 0,
    HasPow    = 0,

    HasSin    = 0,
@@ -70,26 +64,17 @@ struct default_packet_traits
    HasTan    = 0,
    HasASin   = 0,
    HasACos   = 0,
-    HasATan   = 0,
-    HasSinh    = 0,
-    HasCosh    = 0,
-    HasTanh    = 0,
-
-    HasRound  = 0,
-    HasFloor  = 0,
-    HasCeil   = 0
+    HasATan   = 0
  };
 };

 template<typename T> struct packet_traits : default_packet_traits
 {
  typedef T type;
-  typedef T half;
  enum {
    Vectorizable = 0,
    size = 1,
-    AlignedOnScalar = 0,
-    HasHalfPacket = 0
+    AlignedOnScalar = 0
  };
  enum {
    HasAdd    = 0,
@@ -105,256 +90,132 @@ template<typename T> struct packet_traits : default_packet_traits
  };
 };

-template<typename T> struct packet_traits<const T> : packet_traits<T> { };
-
-template <typename Src, typename Tgt> struct type_casting_traits {
-  enum {
-    VectorizedCast = 0,
-    SrcCoeffRatio = 1,
-    TgtCoeffRatio = 1
-  };
-};
-
-
-/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
-template <typename SrcPacket, typename TgtPacket>
-EIGEN_DEVICE_FUNC inline TgtPacket
-pcast(const SrcPacket& a) {
-  return static_cast<TgtPacket>(a);
-}
-template <typename SrcPacket, typename TgtPacket>
-EIGEN_DEVICE_FUNC inline TgtPacket
-pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
-  return static_cast<TgtPacket>(a);
-}
-
-
 /** \internal \returns a + b (coeff-wise) */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 padd(const Packet& a,
        const Packet& b) { return a+b; }

 /** \internal \returns a - b (coeff-wise) */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 psub(const Packet& a,
        const Packet& b) { return a-b; }

 /** \internal \returns -a (coeff-wise) */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 pnegate(const Packet& a) { return -a; }

 /** \internal \returns conj(a) (coeff-wise) */
-
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pconj(const Packet& a) { return numext::conj(a); }
+template<typename Packet> inline Packet
+pconj(const Packet& a) { return conj(a); }

 /** \internal \returns a * b (coeff-wise) */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 pmul(const Packet& a,
        const Packet& b) { return a*b; }

 /** \internal \returns a / b (coeff-wise) */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 pdiv(const Packet& a,
        const Packet& b) { return a/b; }

 /** \internal \returns the min of \a a and \a b  (coeff-wise) */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 pmin(const Packet& a,
-        const Packet& b) { return numext::mini(a, b); }
+        const Packet& b) { using std::min; return (min)(a, b); }

 /** \internal \returns the max of \a a and \a b  (coeff-wise) */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 pmax(const Packet& a,
-        const Packet& b) { return numext::maxi(a, b); }
+        const Packet& b) { using std::max; return (max)(a, b); }

 /** \internal \returns the absolute value of \a a */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pabs(const Packet& a) { using std::abs; return abs(a); }
-
-/** \internal \returns the phase angle of \a a */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-parg(const Packet& a) { using numext::arg; return arg(a); }
+template<typename Packet> inline Packet
+pabs(const Packet& a) { return abs(a); }

 /** \internal \returns the bitwise and of \a a and \a b */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 pand(const Packet& a, const Packet& b) { return a & b; }

 /** \internal \returns the bitwise or of \a a and \a b */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 por(const Packet& a, const Packet& b) { return a | b; }

 /** \internal \returns the bitwise xor of \a a and \a b */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 pxor(const Packet& a, const Packet& b) { return a ^ b; }

 /** \internal \returns the bitwise andnot of \a a and \a b */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 pandnot(const Packet& a, const Packet& b) { return a & (!b); }

 /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }

 /** \internal \returns a packet version of \a *from, (un-aligned load) */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }

-/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
-
-/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pload1(const typename unpacket_traits<Packet>::type  *a) { return pset1<Packet>(*a); }
-
-/** \internal \returns a packet with elements of \a *from duplicated.
-  * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
-  * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
-  * Currently, this function is only used for scalar * complex products.
-  */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+/** \internal \returns a packet with elements of \a *from duplicated, e.g.: (from[0],from[0],from[1],from[1]) */
+template<typename Packet> inline Packet
 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }

-/** \internal \returns a packet with elements of \a *from quadrupled.
-  * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
-  * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
-  * Currently, this function is only used in matrix products.
-  * For packet-size smaller or equal to 4, this function is equivalent to pload1 
-  */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-ploadquad(const typename unpacket_traits<Packet>::type* from)
-{ return pload1<Packet>(from); }
-
-/** \internal equivalent to
-  * \code
-  * a0 = pload1(a+0);
-  * a1 = pload1(a+1);
-  * a2 = pload1(a+2);
-  * a3 = pload1(a+3);
-  * \endcode
-  * \sa pset1, pload1, ploaddup, pbroadcast2
-  */
-template<typename Packet> EIGEN_DEVICE_FUNC
-inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
-                        Packet& a0, Packet& a1, Packet& a2, Packet& a3)
-{
-  a0 = pload1<Packet>(a+0);
-  a1 = pload1<Packet>(a+1);
-  a2 = pload1<Packet>(a+2);
-  a3 = pload1<Packet>(a+3);
-}
-
-/** \internal equivalent to
-  * \code
-  * a0 = pload1(a+0);
-  * a1 = pload1(a+1);
-  * \endcode
-  * \sa pset1, pload1, ploaddup, pbroadcast4
-  */
-template<typename Packet> EIGEN_DEVICE_FUNC
-inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
-                        Packet& a0, Packet& a1)
-{
-  a0 = pload1<Packet>(a+0);
-  a1 = pload1<Packet>(a+1);
-}
+/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
+template<typename Packet> inline Packet
+pset1(const typename unpacket_traits<Packet>::type& a) { return a; }

 /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
-template<typename Packet> inline Packet
-plset(const typename unpacket_traits<Packet>::type& a) { return a; }
+template<typename Scalar> inline typename packet_traits<Scalar>::type
+plset(const Scalar& a) { return a; }

 /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
-template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
+template<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from)
 { (*to) = from; }

 /** \internal copy the packet \a from to \a *to, (un-aligned store) */
-template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
-{  (*to) = from; }
-
- template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
- { return ploadu<Packet>(from); }
-
- template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
- { pstore(to, from); }
+template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from)
+{ (*to) = from; }

 /** \internal tries to do cache prefetching of \a addr */
 template<typename Scalar> inline void prefetch(const Scalar* addr)
 {
-#ifdef __CUDA_ARCH__
-#if defined(__LP64__)
-  // 64-bit pointer operand constraint for inlined asm
-  asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
-#else
-  // 32-bit pointer operand constraint for inlined asm
-  asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
-#endif
-#elif !EIGEN_COMP_MSVC
-  __builtin_prefetch(addr);
+#if !defined(_MSC_VER)
+__builtin_prefetch(addr);
 #endif
 }

 /** \internal \returns the first element of a packet */
-template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
+template<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
 { return a; }

 /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 preduxp(const Packet* vecs) { return vecs[0]; }

 /** \internal \returns the sum of the elements of \a a*/
-template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
-{ return a; }
-
-/** \internal \returns the sum of the elements of \a a by block of 4 elements.
-  * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
-  * For packet-size smaller or equal to 4, this boils down to a noop.
-  */
-template<typename Packet> EIGEN_DEVICE_FUNC inline
-typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
-predux4(const Packet& a)
+template<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a)
 { return a; }

 /** \internal \returns the product of the elements of \a a*/
-template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
+template<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
 { return a; }

 /** \internal \returns the min of the elements of \a a*/
-template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
+template<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
 { return a; }

 /** \internal \returns the max of the elements of \a a*/
-template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
+template<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
 { return a; }

 /** \internal \returns the reversed elements of \a a*/
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
+template<typename Packet> inline Packet preverse(const Packet& a)
 { return a; }

-template<size_t offset, typename Packet>
-struct protate_impl
-{
-  // Empty so attempts to use this unimplemented path will fail to compile.
-  // Only specializations of this template should be used.
-};
-
-/** \internal \returns a packet with the coefficients rotated to the right in little-endian convention,
-  * by the given offset, e.g. for offset == 1:
-  *     (packet[3], packet[2], packet[1], packet[0]) becomes (packet[0], packet[3], packet[2], packet[1])
-  */
-template<size_t offset, typename Packet> EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a)
-{
-  return offset ? protate_impl<offset, Packet>::run(a) : a;
-}

 /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
-{
-  // FIXME: uncomment the following in case we drop the internal imag and real functions.
-//   using std::imag;
-//   using std::real;
-  return Packet(imag(a),real(a));
-}
+template<typename Packet> inline Packet pcplxflip(const Packet& a)
+{ return Packet(imag(a),real(a)); }

 /**************************
 * Special math functions
@@ -362,73 +223,35 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet

 /** \internal \returns the sine of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet psin(const Packet& a) { using std::sin; return sin(a); }
+Packet psin(const Packet& a) { return sin(a); }

 /** \internal \returns the cosine of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet pcos(const Packet& a) { using std::cos; return cos(a); }
+Packet pcos(const Packet& a) { return cos(a); }

 /** \internal \returns the tan of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet ptan(const Packet& a) { using std::tan; return tan(a); }
+Packet ptan(const Packet& a) { return tan(a); }

 /** \internal \returns the arc sine of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet pasin(const Packet& a) { using std::asin; return asin(a); }
+Packet pasin(const Packet& a) { return asin(a); }

 /** \internal \returns the arc cosine of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet pacos(const Packet& a) { using std::acos; return acos(a); }
-
-/** \internal \returns the arc tangent of \a a (coeff-wise) */
-template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet patan(const Packet& a) { using std::atan; return atan(a); }
-
-/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
-template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
-
-/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
-template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
-
-/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
-template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
+Packet pacos(const Packet& a) { return acos(a); }

 /** \internal \returns the exp of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet pexp(const Packet& a) { using std::exp; return exp(a); }
+Packet pexp(const Packet& a) { return exp(a); }

 /** \internal \returns the log of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet plog(const Packet& a) { using std::log; return log(a); }
-
-/** \internal \returns the log10 of \a a (coeff-wise) */
-template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet plog10(const Packet& a) { using std::log10; return log10(a); }
+Packet plog(const Packet& a) { return log(a); }

 /** \internal \returns the square-root of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
-
-/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
-template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet prsqrt(const Packet& a) {
-  return pdiv(pset1<Packet>(1), psqrt(a));
-}
-
-/** \internal \returns the rounded value of \a a (coeff-wise) */
-template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet pround(const Packet& a) { using numext::round; return round(a); }
-
-/** \internal \returns the floor of \a a (coeff-wise) */
-template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
-
-/** \internal \returns the ceil of \a a (coeff-wise) */
-template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
+Packet psqrt(const Packet& a) { return sqrt(a); }

 /***************************************************************************
 * The following functions might not have to be overwritten for vectorized types
@@ -443,45 +266,34 @@ inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename u
 }

 /** \internal \returns a * b + c (coeff-wise) */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+template<typename Packet> inline Packet
 pmadd(const Packet&  a,
         const Packet&  b,
         const Packet&  c)
 { return padd(pmul(a, b),c); }

 /** \internal \returns a packet version of \a *from.
-  * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
-template<typename Packet, int Alignment>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
+  * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
+template<typename Packet, int LoadMode>
+inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
 {
-  if(Alignment >= unpacket_traits<Packet>::alignment)
+  if(LoadMode == Aligned)
    return pload<Packet>(from);
  else
    return ploadu<Packet>(from);
 }

 /** \internal copy the packet \a from to \a *to.
-  * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
-template<typename Scalar, typename Packet, int Alignment>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
+  * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
+template<typename Scalar, typename Packet, int LoadMode>
+inline void pstoret(Scalar* to, const Packet& from)
 {
-  if(Alignment >= unpacket_traits<Packet>::alignment)
+  if(LoadMode == Aligned)
    pstore(to, from);
  else
    pstoreu(to, from);
 }

-/** \internal \returns a packet version of \a *from.
-  * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
-  * hardware if available to speedup the loading of data that won't be modified
-  * by the current computation.
-  */
-template<typename Packet, int LoadMode>
-inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
-{
-  return ploadt<Packet, LoadMode>(from);
-}
-
 /** \internal default implementation of palign() allowing partial specialization */
 template<int Offset,typename PacketType>
 struct palign_impl
@@ -490,21 +302,8 @@ struct palign_impl
  static inline void run(PacketType&, const PacketType&) {}
 };

-/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
-  * of \a first and \a Offset first elements of \a second.
-  * 
-  * This function is currently only used to optimize matrix-vector products on unligned matrices.
-  * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
-  * at the position \a Offset. For instance, for packets of 4 elements, we have:
-  *  Input:
-  *  - first = {f0,f1,f2,f3}
-  *  - second = {s0,s1,s2,s3}
-  * Output: 
-  *   - if Offset==0 then {f0,f1,f2,f3}
-  *   - if Offset==1 then {f1,f2,f3,s0}
-  *   - if Offset==2 then {f2,f3,s0,s1}
-  *   - if Offset==3 then {f3,s0,s1,s3}
-  */
+/** \internal update \a first using the concatenation of the \a Offset last elements
+  * of \a first and packet_size minus \a Offset first elements of \a second */
 template<int Offset,typename PacketType>
 inline void palign(PacketType& first, const PacketType& second)
 {
@@ -515,46 +314,15 @@ inline void palign(PacketType& first, const PacketType& second)
 * Fast complex products (GCC generates a function call which is very slow)
 ***************************************************************************/

-// Eigen+CUDA does not support complexes.
-#ifndef __CUDACC__
-
 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }

 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
 { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }

-#endif
-
-
-/***************************************************************************
- * PacketBlock, that is a collection of N packets where the number of words
- * in the packet is a multiple of N.
-***************************************************************************/
-template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
-  Packet packet[N];
-};
-
-template<typename Packet> EIGEN_DEVICE_FUNC inline void
-ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
-  // Nothing to do in the scalar case, i.e. a 1x1 matrix.
-}
-
-/***************************************************************************
- * Selector, i.e. vector of N boolean values used to select (i.e. blend)
- * words from 2 packets.
-***************************************************************************/
-template <size_t N> struct Selector {
-  bool select[N];
-};
-
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
-  return ifPacket.select[0] ? thenPacket : elsePacket;
-}
-
 } // end namespace internal

 } // end namespace Eigen

 #endif // EIGEN_GENERIC_PACKET_MATH_H
+
--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2010-2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -11,11 +11,11 @@
 #ifndef EIGEN_GLOBAL_FUNCTIONS_H
 #define EIGEN_GLOBAL_FUNCTIONS_H

-#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR) \
+#define EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(NAME,FUNCTOR) \
  template<typename Derived> \
  inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
-  (NAME)(const Eigen::ArrayBase<Derived>& x) { \
-    return Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(x.derived()); \
+  NAME(const Eigen::ArrayBase<Derived>& x) { \
+    return x.derived(); \
  }

 #define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \
@@ -30,93 +30,50 @@
  { \
    static inline typename NAME##_retval<ArrayBase<Derived> >::type run(const Eigen::ArrayBase<Derived>& x) \
    { \
-      return typename NAME##_retval<ArrayBase<Derived> >::type(x.derived()); \
+      return x.derived(); \
    } \
  };

-namespace Eigen
+
+namespace std
 {
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op)
-  
+  EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(real,scalar_real_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(imag,scalar_imag_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(sin,scalar_sin_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(cos,scalar_cos_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(asin,scalar_asin_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(acos,scalar_acos_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(tan,scalar_tan_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(exp,scalar_exp_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(log,scalar_log_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(abs,scalar_abs_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_STD_UNARY(sqrt,scalar_sqrt_op)
+
  template<typename Derived>
  inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar>, const Derived>
  pow(const Eigen::ArrayBase<Derived>& x, const typename Derived::Scalar& exponent) {
    return x.derived().pow(exponent);
  }

-  /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents.
-    *
-    * This function computes the coefficient-wise power.
-    *
-    * Example: \include Cwise_array_power_array.cpp
-    * Output: \verbinclude Cwise_array_power_array.out
-    * 
-    * \sa ArrayBase::pow()
-    */
-  template<typename Derived,typename ExponentDerived>
-  inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>
-  pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents) 
+  template<typename Derived>
+  inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const Derived, const Derived>
+  pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<Derived>& exponents) 
  {
-    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(
+    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const Derived, const Derived>(
      x.derived(),
      exponents.derived()
    );
  }
-  
-  /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents.
-    *
-    * This function computes the coefficient-wise power between a scalar and an array of exponents.
-    * Beaware that the scalar type of the input scalar \a x and the exponents \a exponents must be the same.
-    *
-    * Example: \include Cwise_scalar_power_array.cpp
-    * Output: \verbinclude Cwise_scalar_power_array.out
-    * 
-    * \sa ArrayBase::pow()
-    */
-  template<typename Derived>
-  inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const typename Derived::ConstantReturnType, const Derived>
-  pow(const typename Derived::Scalar& x, const Eigen::ArrayBase<Derived>& exponents) 
-  {
-    typename Derived::ConstantReturnType constant_x(exponents.rows(), exponents.cols(), x);
-    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const typename Derived::ConstantReturnType, const Derived>(
-      constant_x,
-      exponents.derived()
-    );
-  }
-  
+}
+
+namespace Eigen
+{
  /**
  * \brief Component-wise division of a scalar by array elements.
  **/
  template <typename Derived>
  inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>
-    operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase<Derived>& a)
+    operator/(typename Derived::Scalar s, const Eigen::ArrayBase<Derived>& a)
  {
    return Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>(
      a.derived(),
@@ -128,10 +85,19 @@ namespace Eigen
  {
    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op)
    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(imag,scalar_imag_op)
+    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(sin,scalar_sin_op)
+    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(cos,scalar_cos_op)
+    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(asin,scalar_asin_op)
+    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(acos,scalar_acos_op)
+    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(tan,scalar_tan_op)
+    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(exp,scalar_exp_op)
+    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(log,scalar_log_op)
+    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs,scalar_abs_op)
    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs2,scalar_abs2_op)
+    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(sqrt,scalar_sqrt_op)
  }
 }

-// TODO: cleanly disable those functions that are not supported on Array (numext::real_ref, internal::random, internal::isApprox...)
+// TODO: cleanly disable those functions that are not supported on Array (internal::real_ref, internal::random, internal::isApprox...)

 #endif // EIGEN_GLOBAL_FUNCTIONS_H
--- a/Eigen/src/Core/IO.h
+++ b/Eigen/src/Core/IO.h
@@ -49,18 +49,15 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
  */
 struct IOFormat
 {
-  /** Default constructor, see class IOFormat for the meaning of the parameters */
+  /** Default contructor, see class IOFormat for the meaning of the parameters */
  IOFormat(int _precision = StreamPrecision, int _flags = 0,
    const std::string& _coeffSeparator = " ",
    const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="",
    const std::string& _matPrefix="", const std::string& _matSuffix="")
  : matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator),
-    rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags)
+    coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags)
  {
-    // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline
-    // don't add rowSpacer if columns are not to be aligned
-    if((flags & DontAlignCols))
-      return;
+    rowSpacer = "";
    int i = int(matSuffix.length())-1;
    while (i>=0 && matSuffix[i]!='\n')
    {
@@ -132,7 +129,6 @@ struct significant_decimals_default_impl
  static inline int run()
  {
    using std::ceil;
-    using std::log;
    return cast<RealScalar,int>(ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
  }
 };
@@ -164,6 +160,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
  
  typename Derived::Nested m = _m;
  typedef typename Derived::Scalar Scalar;
+  typedef typename Derived::Index Index;

  Index width = 0;

@@ -188,22 +185,21 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
    explicit_precision = fmt.precision;
  }

-  std::streamsize old_precision = 0;
-  if(explicit_precision) old_precision = s.precision(explicit_precision);
-
  bool align_cols = !(fmt.flags & DontAlignCols);
  if(align_cols)
  {
    // compute the largest width
-    for(Index j = 0; j < m.cols(); ++j)
+    for(Index j = 1; j < m.cols(); ++j)
      for(Index i = 0; i < m.rows(); ++i)
      {
        std::stringstream sstr;
-        sstr.copyfmt(s);
+        if(explicit_precision) sstr.precision(explicit_precision);
        sstr << m.coeff(i,j);
        width = std::max<Index>(width, Index(sstr.str().length()));
      }
  }
+  std::streamsize old_precision = 0;
+  if(explicit_precision) old_precision = s.precision(explicit_precision);
  s << fmt.matPrefix;
  for(Index i = 0; i < m.rows(); ++i)
  {
--- a/Eigen/src/Core/Inverse.h
+++ b/Eigen/src/Core/Inverse.h
@@ -1,126 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_INVERSE_H
-#define EIGEN_INVERSE_H
-
-namespace Eigen { 
-
-// TODO move the general declaration in Core, and rename this file DenseInverseImpl.h, or something like this...
-
-template<typename XprType,typename StorageKind> class InverseImpl;
-
-namespace internal {
-
-template<typename XprType>
-struct traits<Inverse<XprType> >
-  : traits<typename XprType::PlainObject>
-{
-  typedef typename XprType::PlainObject PlainObject;
-  typedef traits<PlainObject> BaseTraits;
-  enum {
-    Flags = BaseTraits::Flags & RowMajorBit
-  };
-};
-
-} // end namespace internal
-
-/** \class Inverse
-  *
-  * \brief Expression of the inverse of another expression
-  *
-  * \tparam XprType the type of the expression we are taking the inverse
-  *
-  * This class represents an abstract expression of A.inverse()
-  * and most of the time this is the only way it is used.
-  *
-  */
-template<typename XprType>
-class Inverse : public InverseImpl<XprType,typename internal::traits<XprType>::StorageKind>
-{
-public:
-  typedef typename XprType::StorageIndex StorageIndex;
-  typedef typename XprType::PlainObject                       PlainObject;
-  typedef typename internal::ref_selector<XprType>::type      XprTypeNested;
-  typedef typename internal::remove_all<XprTypeNested>::type  XprTypeNestedCleaned;
-  
-  explicit Inverse(const XprType &xpr)
-    : m_xpr(xpr)
-  {}
-  
-  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
-  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
-
-  EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; }
-
-protected:
-  XprTypeNested m_xpr;
-};
-
-/** \internal
-  * Specialization of the Inverse expression for dense expressions.
-  * Direct access to the coefficients are discared.
-  * FIXME this intermediate class is probably not needed anymore.
-  */
-template<typename XprType>
-class InverseImpl<XprType,Dense>
-  : public MatrixBase<Inverse<XprType> >
-{
-  typedef Inverse<XprType> Derived;
-  
-public:
-  
-  typedef MatrixBase<Derived> Base;
-  EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
-  typedef typename internal::remove_all<XprType>::type NestedExpression;
-
-private:
-  
-  Scalar coeff(Index row, Index col) const;
-  Scalar coeff(Index i) const;
-};
-
-namespace internal {
-
-/** \internal
-  * \brief Default evaluator for Inverse expression.
-  * 
-  * This default evaluator for Inverse expression simply evaluate the inverse into a temporary
-  * by a call to internal::call_assignment_no_alias.
-  * Therefore, inverse implementers only have to specialize Assignment<Dst,Inverse<...>, ...> for
-  * there own nested expression.
-  *
-  * \sa class Inverse
-  */
-template<typename ArgType>
-struct unary_evaluator<Inverse<ArgType> >
-  : public evaluator<typename Inverse<ArgType>::PlainObject>
-{
-  typedef Inverse<ArgType> InverseType;
-  typedef typename InverseType::PlainObject PlainObject;
-  typedef evaluator<PlainObject> Base;
-  
-  enum { Flags = Base::Flags | EvalBeforeNestingBit };
-
-  unary_evaluator(const InverseType& inv_xpr)
-    : m_result(inv_xpr.rows(), inv_xpr.cols())
-  {
-    ::new (static_cast<Base*>(this)) Base(m_result);
-    internal::call_assignment_no_alias(m_result, inv_xpr);
-  }
-  
-protected:
-  PlainObject m_result;
-};
-  
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_INVERSE_H
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -19,7 +19,7 @@ namespace Eigen {
  * \brief A matrix or vector expression mapping an existing array of data.
  *
  * \tparam PlainObjectType the equivalent matrix type of the mapped data
-  * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
+  * \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned.
  *                The default is \c #Unaligned.
  * \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
  *                   of an ordinary, contiguous array. This can be overridden by specifying strides.
@@ -70,6 +70,8 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
  : public traits<PlainObjectType>
 {
  typedef traits<PlainObjectType> TraitsBase;
+  typedef typename PlainObjectType::Index Index;
+  typedef typename PlainObjectType::Scalar Scalar;
  enum {
    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
                             ? int(PlainObjectType::InnerStrideAtCompileTime)
@@ -77,9 +79,22 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
                             ? int(PlainObjectType::OuterStrideAtCompileTime)
                             : int(StrideType::OuterStrideAtCompileTime),
-    Alignment = int(MapOptions)&int(AlignedMask),
+    HasNoInnerStride = InnerStrideAtCompileTime == 1,
+    HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
+    HasNoStride = HasNoInnerStride && HasNoOuterStride,
+    IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
+    IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
+    KeepsPacketAccess = bool(HasNoInnerStride)
+                        && ( bool(IsDynamicSize)
+                           || HasNoOuterStride
+                           || ( OuterStrideAtCompileTime!=Dynamic
+                           && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ),
    Flags0 = TraitsBase::Flags & (~NestByRefBit),
-    Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
+    Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
+    Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
+           ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
+    Flags3 = is_lvalue<PlainObjectType>::value ? int(Flags2) : (int(Flags2) & ~LvalueBit),
+    Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit)
  };
 private:
  enum { Options }; // Expressions don't have Options
@@ -95,17 +110,19 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
    EIGEN_DENSE_PUBLIC_INTERFACE(Map)

    typedef typename Base::PointerType PointerType;
+#if EIGEN2_SUPPORT_STAGE <= STAGE30_FULL_EIGEN3_API
+    typedef const Scalar* PointerArgType;
+    inline PointerType cast_to_pointer_type(PointerArgType ptr) { return const_cast<PointerType>(ptr); }
+#else
    typedef PointerType PointerArgType;
-    EIGEN_DEVICE_FUNC
    inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
+#endif

-    EIGEN_DEVICE_FUNC
    inline Index innerStride() const
    {
      return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
    }

-    EIGEN_DEVICE_FUNC
    inline Index outerStride() const
    {
      return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
@@ -116,39 +133,36 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma

    /** Constructor in the fixed-size case.
      *
-      * \param dataPtr pointer to the array to map
+      * \param data pointer to the array to map
      * \param stride optional Stride object, passing the strides.
      */
-    EIGEN_DEVICE_FUNC
-    explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
-      : Base(cast_to_pointer_type(dataPtr)), m_stride(stride)
+    inline Map(PointerArgType data, const StrideType& stride = StrideType())
+      : Base(cast_to_pointer_type(data)), m_stride(stride)
    {
      PlainObjectType::Base::_check_template_params();
    }

    /** Constructor in the dynamic-size vector case.
      *
-      * \param dataPtr pointer to the array to map
+      * \param data pointer to the array to map
      * \param size the size of the vector expression
      * \param stride optional Stride object, passing the strides.
      */
-    EIGEN_DEVICE_FUNC
-    inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())
-      : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride)
+    inline Map(PointerArgType data, Index size, const StrideType& stride = StrideType())
+      : Base(cast_to_pointer_type(data), size), m_stride(stride)
    {
      PlainObjectType::Base::_check_template_params();
    }

    /** Constructor in the dynamic-size matrix case.
      *
-      * \param dataPtr pointer to the array to map
+      * \param data pointer to the array to map
      * \param rows the number of rows of the matrix expression
      * \param cols the number of columns of the matrix expression
      * \param stride optional Stride object, passing the strides.
      */
-    EIGEN_DEVICE_FUNC
-    inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
-      : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride)
+    inline Map(PointerArgType data, Index rows, Index cols, const StrideType& stride = StrideType())
+      : Base(cast_to_pointer_type(data), rows, cols), m_stride(stride)
    {
      PlainObjectType::Base::_check_template_params();
    }
@@ -159,6 +173,19 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
    StrideType m_stride;
 };

+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+inline Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
+  ::Array(const Scalar *data)
+{
+  this->_set_noalias(Eigen::Map<const Array>(data));
+}
+
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+inline Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>
+  ::Matrix(const Scalar *data)
+{
+  this->_set_noalias(Eigen::Map<const Matrix>(data));
+}

 } // end namespace Eigen

--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -12,7 +12,7 @@
 #define EIGEN_MAPBASE_H

 #define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \
-      EIGEN_STATIC_ASSERT((int(internal::evaluator<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
+      EIGEN_STATIC_ASSERT((int(internal::traits<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
                          YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)

 namespace Eigen { 
@@ -37,6 +37,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
    };

    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
@@ -75,8 +76,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>

    typedef typename Base::CoeffReturnType CoeffReturnType;

-    EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
-    EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
+    inline Index rows() const { return m_rows.value(); }
+    inline Index cols() const { return m_cols.value(); }

    /** Returns a pointer to the first coefficient of the matrix or vector.
      *
@@ -84,28 +85,24 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
      *
      * \sa innerStride(), outerStride()
      */
-    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; }
+    inline const Scalar* data() const { return m_data; }

-    EIGEN_DEVICE_FUNC
-    inline const Scalar& coeff(Index rowId, Index colId) const
+    inline const Scalar& coeff(Index row, Index col) const
    {
-      return m_data[colId * colStride() + rowId * rowStride()];
+      return m_data[col * colStride() + row * rowStride()];
    }

-    EIGEN_DEVICE_FUNC
    inline const Scalar& coeff(Index index) const
    {
      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
      return m_data[index * innerStride()];
    }

-    EIGEN_DEVICE_FUNC
-    inline const Scalar& coeffRef(Index rowId, Index colId) const
+    inline const Scalar& coeffRef(Index row, Index col) const
    {
-      return this->m_data[colId * colStride() + rowId * rowStride()];
+      return this->m_data[col * colStride() + row * rowStride()];
    }

-    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index index) const
    {
      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
@@ -113,10 +110,10 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
    }

    template<int LoadMode>
-    inline PacketScalar packet(Index rowId, Index colId) const
+    inline PacketScalar packet(Index row, Index col) const
    {
      return internal::ploadt<PacketScalar, LoadMode>
-               (m_data + (colId * colStride() + rowId * rowStride()));
+               (m_data + (col * colStride() + row * rowStride()));
    }

    template<int LoadMode>
@@ -126,30 +123,27 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
      return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
    }

-    EIGEN_DEVICE_FUNC
-    explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
+    inline MapBase(PointerType data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
    {
      EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
      checkSanity();
    }

-    EIGEN_DEVICE_FUNC
-    inline MapBase(PointerType dataPtr, Index vecSize)
-            : m_data(dataPtr),
-              m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),
-              m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime))
+    inline MapBase(PointerType data, Index size)
+            : m_data(data),
+              m_rows(RowsAtCompileTime == Dynamic ? size : Index(RowsAtCompileTime)),
+              m_cols(ColsAtCompileTime == Dynamic ? size : Index(ColsAtCompileTime))
    {
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-      eigen_assert(vecSize >= 0);
-      eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize);
+      eigen_assert(size >= 0);
+      eigen_assert(data == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == size);
      checkSanity();
    }

-    EIGEN_DEVICE_FUNC
-    inline MapBase(PointerType dataPtr, Index rows, Index cols)
-            : m_data(dataPtr), m_rows(rows), m_cols(cols)
+    inline MapBase(PointerType data, Index rows, Index cols)
+            : m_data(data), m_rows(rows), m_cols(cols)
    {
-      eigen_assert( (dataPtr == 0)
+      eigen_assert( (data == 0)
              || (   rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
                  && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
      checkSanity();
@@ -157,12 +151,13 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>

  protected:

-    EIGEN_DEVICE_FUNC
    void checkSanity() const
    {
-#if EIGEN_MAX_ALIGN_BYTES>0
-      eigen_assert(((size_t(m_data) % EIGEN_PLAIN_ENUM_MAX(1,internal::traits<Derived>::Alignment)) == 0) && "data is not aligned");
-#endif
+      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
+                                        internal::inner_stride_at_compile_time<Derived>::ret==1),
+                          PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
+      eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
+                   && "data is not aligned");
    }

    PointerType m_data;
@@ -173,14 +168,13 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
 template<typename Derived> class MapBase<Derived, WriteAccessors>
  : public MapBase<Derived, ReadOnlyAccessors>
 {
-    typedef MapBase<Derived, ReadOnlyAccessors> ReadOnlyMapBase;
  public:

    typedef MapBase<Derived, ReadOnlyAccessors> Base;

    typedef typename Base::Scalar Scalar;
    typedef typename Base::PacketScalar PacketScalar;
-    typedef typename Base::StorageIndex StorageIndex;
+    typedef typename Base::Index Index;
    typedef typename Base::PointerType PointerType;

    using Base::derived;
@@ -201,18 +195,14 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
                    const Scalar
                  >::type ScalarWithConstIfNotLvalue;

-    EIGEN_DEVICE_FUNC
    inline const Scalar* data() const { return this->m_data; }
-    EIGEN_DEVICE_FUNC
    inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error

-    EIGEN_DEVICE_FUNC
    inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
    {
      return this->m_data[col * colStride() + row * rowStride()];
    }

-    EIGEN_DEVICE_FUNC
    inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
    {
      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
@@ -220,38 +210,33 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
    }

    template<int StoreMode>
-    inline void writePacket(Index row, Index col, const PacketScalar& val)
+    inline void writePacket(Index row, Index col, const PacketScalar& x)
    {
      internal::pstoret<Scalar, PacketScalar, StoreMode>
-               (this->m_data + (col * colStride() + row * rowStride()), val);
+               (this->m_data + (col * colStride() + row * rowStride()), x);
    }

    template<int StoreMode>
-    inline void writePacket(Index index, const PacketScalar& val)
+    inline void writePacket(Index index, const PacketScalar& x)
    {
      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
      internal::pstoret<Scalar, PacketScalar, StoreMode>
-                (this->m_data + index * innerStride(), val);
+                (this->m_data + index * innerStride(), x);
    }

-    EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
-    EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
-    EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}
+    explicit inline MapBase(PointerType data) : Base(data) {}
+    inline MapBase(PointerType data, Index size) : Base(data, size) {}
+    inline MapBase(PointerType data, Index rows, Index cols) : Base(data, rows, cols) {}

-    EIGEN_DEVICE_FUNC
    Derived& operator=(const MapBase& other)
    {
-      ReadOnlyMapBase::Base::operator=(other);
+      Base::Base::operator=(other);
      return derived();
    }

-    // In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base,
-    // see bugs 821 and 920.
-    using ReadOnlyMapBase::Base::operator=;
+    using Base::Base::operator=;
 };

-#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS
-
 } // end namespace Eigen

 #endif // EIGEN_MAPBASE_H
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -24,13 +24,13 @@ namespace Eigen {
  * The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note").
  *
  * The first three template parameters are required:
-  * \tparam _Scalar Numeric type, e.g. float, double, int or std::complex<float>.
-  *                 User defined scalar types are supported as well (see \ref user_defined_scalars "here").
+  * \tparam _Scalar \anchor matrix_tparam_scalar Numeric type, e.g. float, double, int or std::complex<float>.
+  *                 User defined sclar types are supported as well (see \ref user_defined_scalars "here").
  * \tparam _Rows Number of rows, or \b Dynamic
  * \tparam _Cols Number of columns, or \b Dynamic
  *
  * The remaining template parameters are optional -- in most cases you don't have to worry about them.
-  * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of either
+  * \tparam _Options \anchor matrix_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either
  *                 \b #AutoAlign or \b #DontAlign.
  *                 The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
  *                 for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
@@ -97,40 +97,6 @@ namespace Eigen {
  * are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>
  * </dl>
  *
-  * <i><b>ABI and storage layout</b></i>
-  * 
-  * The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3.
-  * <table  class="manual">
-  * <tr><th>Matrix type</th><th>Equivalent C structure</th></tr>
-  * <tr><td>\code Matrix<T,Dynamic,Dynamic> \endcode</td><td>\code
-  * struct {
-  *   T *data;                  // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0
-  *   Eigen::Index rows, cols;
-  *  };
-  * \endcode</td></tr>
-  * <tr class="alt"><td>\code
-  * Matrix<T,Dynamic,1>
-  * Matrix<T,1,Dynamic> \endcode</td><td>\code
-  * struct {
-  *   T *data;                  // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0
-  *   Eigen::Index size;
-  *  };
-  * \endcode</td></tr>
-  * <tr><td>\code Matrix<T,Rows,Cols> \endcode</td><td>\code
-  * struct {
-  *   T data[Rows*Cols];        // with (size_t(data)%A(Rows*Cols*sizeof(T)))==0
-  *  };
-  * \endcode</td></tr>
-  * <tr class="alt"><td>\code Matrix<T,Dynamic,Dynamic,0,MaxRows,MaxCols> \endcode</td><td>\code
-  * struct {
-  *   T data[MaxRows*MaxCols];  // with (size_t(data)%A(MaxRows*MaxCols*sizeof(T)))==0
-  *   Eigen::Index rows, cols;
-  *  };
-  * \endcode</td></tr>
-  * </table>
-  * Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two
-  * smaller to EIGEN_MAX_STATIC_ALIGN_BYTES.
-  * 
  * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy, 
  * \ref TopicStorageOrders 
  */
@@ -139,23 +105,9 @@ namespace internal {
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
 {
-private:
-  enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
-  typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
-  enum {
-      row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
-      is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
-      max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
-      default_alignment = compute_default_alignment<_Scalar,max_size>::value,
-      actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
-      required_alignment = unpacket_traits<PacketScalar>::alignment,
-      packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
-    };
-    
-public:
  typedef _Scalar Scalar;
  typedef Dense StorageKind;
-  typedef Eigen::Index StorageIndex;
+  typedef DenseIndex Index;
  typedef MatrixXpr XprKind;
  enum {
    RowsAtCompileTime = _Rows,
@@ -163,13 +115,10 @@ public:
    MaxRowsAtCompileTime = _MaxRows,
    MaxColsAtCompileTime = _MaxCols,
    Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
+    CoeffReadCost = NumTraits<Scalar>::ReadCost,
    Options = _Options,
    InnerStrideAtCompileTime = 1,
-    OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
-    
-    // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
-    EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
-    Alignment = actual_alignment
+    OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime
  };
 };
 }
@@ -202,7 +151,6 @@ class Matrix
      *
      * \callgraph
      */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)
    {
      return Base::_set(other);
@@ -219,8 +167,7 @@ class Matrix
      * remain row-vectors and vectors remain vectors.
      */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase<OtherDerived>& other)
+    EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase<OtherDerived>& other)
    {
      return Base::_set(other);
    }
@@ -232,14 +179,12 @@ class Matrix
      * \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
      */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
    {
      return Base::operator=(other);
    }

    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func)
    {
      return Base::operator=(func);
@@ -255,95 +200,52 @@ class Matrix
      *
      * \sa resize(Index,Index)
      */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Matrix() : Base()
+    EIGEN_STRONG_INLINE explicit Matrix() : Base()
    {
      Base::_check_template_params();
-      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
    }

    // FIXME is it still needed
-    EIGEN_DEVICE_FUNC
-    explicit Matrix(internal::constructor_without_unaligned_array_assert)
+    Matrix(internal::constructor_without_unaligned_array_assert)
      : Base(internal::constructor_without_unaligned_array_assert())
-    { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
+    { Base::_check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED }

-#ifdef EIGEN_HAVE_RVALUE_REFERENCES
-    EIGEN_DEVICE_FUNC
-    Matrix(Matrix&& other)
-      : Base(std::move(other))
+    /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
+      *
+      * Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
+      * it is redundant to pass the dimension here, so it makes more sense to use the default
+      * constructor Matrix() instead.
+      */
+    EIGEN_STRONG_INLINE explicit Matrix(Index dim)
+      : Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim)
    {
      Base::_check_template_params();
-      if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
-        Base::_set_noalias(other);
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix)
+      eigen_assert(dim >= 0);
+      eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
+      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
    }
-    EIGEN_DEVICE_FUNC
-    Matrix& operator=(Matrix&& other)
-    {
-      other.swap(*this);
-      return *this;
-    }
-#endif

    #ifndef EIGEN_PARSED_BY_DOXYGEN
-
-    // This constructor is for both 1x1 matrices and dynamic vectors
-    template<typename T>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE explicit Matrix(const T& x)
-    {
-      Base::_check_template_params();
-      Base::template _init1<T>(x);
-    }
-
    template<typename T0, typename T1>
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y)
    {
      Base::_check_template_params();
      Base::template _init2<T0,T1>(x, y);
    }
    #else
-    /** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */
-    EIGEN_DEVICE_FUNC
-    explicit Matrix(const Scalar *data);
-
-    /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
-      *
-      * This is useful for dynamic-size vectors. For fixed-size vectors,
-      * it is redundant to pass these parameters, so one should use the default constructor
-      * Matrix() instead.
-      * 
-      * \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance,
-      * calling Matrix<double,1,1>(1) will call the initialization constructor: Matrix(const Scalar&).
-      * For fixed-size \c 1x1 matrices it is therefore recommended to use the default
-      * constructor Matrix() instead, especially when using one of the non standard
-      * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
-      */
-    EIGEN_STRONG_INLINE explicit Matrix(Index dim);
-    /** \brief Constructs an initialized 1x1 matrix with the given coefficient */
-    Matrix(const Scalar& x);
    /** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns.
      *
      * This is useful for dynamic-size matrices. For fixed-size matrices,
      * it is redundant to pass these parameters, so one should use the default constructor
-      * Matrix() instead.
-      * 
-      * \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance,
-      * calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y).
-      * For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default
-      * constructor Matrix() instead, especially when using one of the non standard
-      * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
-      */
-    EIGEN_DEVICE_FUNC
+      * Matrix() instead. */
    Matrix(Index rows, Index cols);
-    
    /** \brief Constructs an initialized 2D vector with given coefficients */
    Matrix(const Scalar& x, const Scalar& y);
    #endif

    /** \brief Constructs an initialized 3D vector with given coefficients */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
    {
      Base::_check_template_params();
@@ -353,7 +255,6 @@ class Matrix
      m_storage.data()[2] = z;
    }
    /** \brief Constructs an initialized 4D vector with given coefficients */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
    {
      Base::_check_template_params();
@@ -364,33 +265,76 @@ class Matrix
      m_storage.data()[3] = w;
    }

+    explicit Matrix(const Scalar *data);

+    /** \brief Constructor copying the value of the expression \a other */
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other)
+             : Base(other.rows() * other.cols(), other.rows(), other.cols())
+    {
+      // This test resides here, to bring the error messages closer to the user. Normally, these checks
+      // are performed deeply within the library, thus causing long and scary error traces.
+      EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
+        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
+      Base::_check_template_params();
+      Base::_set_noalias(other);
+    }
    /** \brief Copy constructor */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other)
-    { }
+    EIGEN_STRONG_INLINE Matrix(const Matrix& other)
+            : Base(other.rows() * other.cols(), other.rows(), other.cols())
+    {
+      Base::_check_template_params();
+      Base::_set_noalias(other);
+    }
+    /** \brief Copy constructor with in-place evaluation */
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE Matrix(const ReturnByValue<OtherDerived>& other)
+    {
+      Base::_check_template_params();
+      Base::resize(other.rows(), other.cols());
+      other.evalTo(*this);
+    }

    /** \brief Copy constructor for generic expressions.
      * \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
      */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
-      : Base(other.derived())
-    { }
+      : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
+    {
+      Base::_check_template_params();
+      Base::resize(other.rows(), other.cols());
+      // FIXME/CHECK: isn't *this = other.derived() more efficient. it allows to
+      //              go for pure _set() implementations, right?
+      *this = other;
+    }

-    EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
-    EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
+    /** \internal
+      * \brief Override MatrixBase::swap() since for dynamic-sized matrices
+      * of same type it is enough to swap the data pointers.
+      */
+    template<typename OtherDerived>
+    void swap(MatrixBase<OtherDerived> const & other)
+    { this->_swap(other.derived()); }
+
+    inline Index innerStride() const { return 1; }
+    inline Index outerStride() const { return this->innerSize(); }

    /////////// Geometry module ///////////

    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    explicit Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);

+    #ifdef EIGEN2_SUPPORT
+    template<typename OtherDerived>
+    explicit Matrix(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
+    template<typename OtherDerived>
+    Matrix& operator=(const eigen2_RotationBase<OtherDerived,ColsAtCompileTime>& r);
+    #endif
+
    // allow to extend Matrix outside Eigen
    #ifdef EIGEN_MATRIX_PLUGIN
    #include EIGEN_MATRIX_PLUGIN
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -52,7 +52,7 @@ template<typename Derived> class MatrixBase
 #ifndef EIGEN_PARSED_BY_DOXYGEN
    typedef MatrixBase StorageBaseType;
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
-    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
+    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
@@ -66,7 +66,8 @@ template<typename Derived> class MatrixBase
    using Base::MaxSizeAtCompileTime;
    using Base::IsVectorAtCompileTime;
    using Base::Flags;
-    
+    using Base::CoeffReadCost;
+
    using Base::derived;
    using Base::const_cast_derived;
    using Base::rows;
@@ -80,8 +81,6 @@ template<typename Derived> class MatrixBase
    using Base::operator-=;
    using Base::operator*=;
    using Base::operator/=;
-    using Base::operator*;
-    using Base::operator/;

    typedef typename Base::CoeffReturnType CoeffReturnType;
    typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
@@ -99,14 +98,25 @@ template<typename Derived> class MatrixBase

    /** \returns the size of the main diagonal, which is min(rows(),cols()).
      * \sa rows(), cols(), SizeAtCompileTime. */
-    EIGEN_DEVICE_FUNC
    inline Index diagonalSize() const { return (std::min)(rows(),cols()); }

-    typedef typename Base::PlainObject PlainObject;
+    /** \brief The plain matrix type corresponding to this expression.
+      *
+      * This is not necessarily exactly the return type of eval(). In the case of plain matrices,
+      * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
+      * that the return type of eval() is either PlainObject or const PlainObject&.
+      */
+    typedef Matrix<typename internal::traits<Derived>::Scalar,
+                internal::traits<Derived>::RowsAtCompileTime,
+                internal::traits<Derived>::ColsAtCompileTime,
+                AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
+                internal::traits<Derived>::MaxRowsAtCompileTime,
+                internal::traits<Derived>::MaxColsAtCompileTime
+          > PlainObject;

 #ifndef EIGEN_PARSED_BY_DOXYGEN
    /** \internal Represents a matrix with all coefficients equal to one another*/
-    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
+    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
    /** \internal the return type of MatrixBase::adjoint() */
    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
                        CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
@@ -115,7 +125,7 @@ template<typename Derived> class MatrixBase
    /** \internal Return type of eigenvalues() */
    typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
    /** \internal the return type of identity */
-    typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,PlainObject> IdentityReturnType;
+    typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,Derived> IdentityReturnType;
    /** \internal the return type of unit vectors */
    typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
                  internal::traits<Derived>::RowsAtCompileTime,
@@ -135,48 +145,36 @@ template<typename Derived> class MatrixBase
    /** Special case of the template operator=, in order to prevent the compiler
      * from generating a default operator= (issue hit with g++ 4.1)
      */
-    EIGEN_DEVICE_FUNC
    Derived& operator=(const MatrixBase& other);

    // We cannot inherit here via Base::operator= since it is causing
    // trouble with MSVC.

    template <typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& operator=(const DenseBase<OtherDerived>& other);

    template <typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& operator=(const EigenBase<OtherDerived>& other);

    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& operator=(const ReturnByValue<OtherDerived>& other);

+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename ProductDerived, typename Lhs, typename Rhs>
+    Derived& lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other);
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& operator+=(const MatrixBase<OtherDerived>& other);
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    Derived& operator-=(const MatrixBase<OtherDerived>& other);

-#ifdef __CUDACC__
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    const Product<Derived,OtherDerived,LazyProduct>
-    operator*(const MatrixBase<OtherDerived> &other) const
-    { return this->lazyProduct(other); }
-#else
-
-    template<typename OtherDerived>
-    const Product<Derived,OtherDerived>
+    const typename ProductReturnType<Derived,OtherDerived>::Type
    operator*(const MatrixBase<OtherDerived> &other) const;

-#endif
-
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC 
-    const Product<Derived,OtherDerived,LazyProduct>
+    const typename LazyProductReturnType<Derived,OtherDerived>::Type
    lazyProduct(const MatrixBase<OtherDerived> &other) const;

    template<typename OtherDerived>
@@ -189,103 +187,100 @@ template<typename Derived> class MatrixBase
    void applyOnTheRight(const EigenBase<OtherDerived>& other);

    template<typename DiagonalDerived>
-    EIGEN_DEVICE_FUNC
-    const Product<Derived, DiagonalDerived, LazyProduct>
+    const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
    operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;

    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
    dot(const MatrixBase<OtherDerived>& other) const;

-    EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
-    EIGEN_DEVICE_FUNC RealScalar norm() const;
+    #ifdef EIGEN2_SUPPORT
+      template<typename OtherDerived>
+      Scalar eigen2_dot(const MatrixBase<OtherDerived>& other) const;
+    #endif
+
+    RealScalar squaredNorm() const;
+    RealScalar norm() const;
    RealScalar stableNorm() const;
    RealScalar blueNorm() const;
    RealScalar hypotNorm() const;
-    EIGEN_DEVICE_FUNC const PlainObject normalized() const;
-    EIGEN_DEVICE_FUNC void normalize();
+    const PlainObject normalized() const;
+    void normalize();

-    EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
-    EIGEN_DEVICE_FUNC void adjointInPlace();
+    const AdjointReturnType adjoint() const;
+    void adjointInPlace();

    typedef Diagonal<Derived> DiagonalReturnType;
-    EIGEN_DEVICE_FUNC
    DiagonalReturnType diagonal();
-    
-    typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
-    EIGEN_DEVICE_FUNC
-    ConstDiagonalReturnType diagonal() const;
+    typedef const Diagonal<const Derived> ConstDiagonalReturnType;
+    const ConstDiagonalReturnType diagonal() const;

    template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
    template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };

-    template<int Index> 
-    EIGEN_DEVICE_FUNC
-    typename DiagonalIndexReturnType<Index>::Type diagonal();
+    template<int Index> typename DiagonalIndexReturnType<Index>::Type diagonal();
+    template<int Index> typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;

-    template<int Index>
-    EIGEN_DEVICE_FUNC
-    typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
+    // Note: The "MatrixBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations.
+    // On the other hand they confuse MSVC8...
+    #if (defined _MSC_VER) && (_MSC_VER >= 1500) // 2008 or later
+    typename MatrixBase::template DiagonalIndexReturnType<Dynamic>::Type diagonal(Index index);
+    typename MatrixBase::template ConstDiagonalIndexReturnType<Dynamic>::Type diagonal(Index index) const;
+    #else
+    typename DiagonalIndexReturnType<Dynamic>::Type diagonal(Index index);
+    typename ConstDiagonalIndexReturnType<Dynamic>::Type diagonal(Index index) const;
+    #endif
+
+    #ifdef EIGEN2_SUPPORT
+    template<unsigned int Mode> typename internal::eigen2_part_return_type<Derived, Mode>::type part();
+    template<unsigned int Mode> const typename internal::eigen2_part_return_type<Derived, Mode>::type part() const;
    
-    typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
-    typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
-
-    EIGEN_DEVICE_FUNC
-    DiagonalDynamicIndexReturnType diagonal(Index index);
-    EIGEN_DEVICE_FUNC
-    ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;
+    // huuuge hack. make Eigen2's matrix.part<Diagonal>() work in eigen3. Problem: Diagonal is now a class template instead
+    // of an integer constant. Solution: overload the part() method template wrt template parameters list.
+    template<template<typename T, int N> class U>
+    const DiagonalWrapper<ConstDiagonalReturnType> part() const
+    { return diagonal().asDiagonal(); }
+    #endif // EIGEN2_SUPPORT

    template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
    template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };

-    template<unsigned int Mode>
-    EIGEN_DEVICE_FUNC
-    typename TriangularViewReturnType<Mode>::Type triangularView();
-    template<unsigned int Mode>
-    EIGEN_DEVICE_FUNC
-    typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
+    template<unsigned int Mode> typename TriangularViewReturnType<Mode>::Type triangularView();
+    template<unsigned int Mode> typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;

    template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
    template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };

-    template<unsigned int UpLo> 
-    EIGEN_DEVICE_FUNC
-    typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
-    template<unsigned int UpLo>
-    EIGEN_DEVICE_FUNC
-    typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
+    template<unsigned int UpLo> typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
+    template<unsigned int UpLo> typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;

    const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
-                                         const typename NumTraits<Scalar>::Real& m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
-    EIGEN_DEVICE_FUNC static const IdentityReturnType Identity();
-    EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols);
-    EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i);
-    EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i);
-    EIGEN_DEVICE_FUNC static const BasisReturnType UnitX();
-    EIGEN_DEVICE_FUNC static const BasisReturnType UnitY();
-    EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();
-    EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();
+                                         typename NumTraits<Scalar>::Real m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
+    static const IdentityReturnType Identity();
+    static const IdentityReturnType Identity(Index rows, Index cols);
+    static const BasisReturnType Unit(Index size, Index i);
+    static const BasisReturnType Unit(Index i);
+    static const BasisReturnType UnitX();
+    static const BasisReturnType UnitY();
+    static const BasisReturnType UnitZ();
+    static const BasisReturnType UnitW();

-    EIGEN_DEVICE_FUNC
    const DiagonalWrapper<const Derived> asDiagonal() const;
    const PermutationWrapper<const Derived> asPermutation() const;

-    EIGEN_DEVICE_FUNC
    Derived& setIdentity();
-    EIGEN_DEVICE_FUNC
    Derived& setIdentity(Index rows, Index cols);

-    bool isIdentity(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    bool isDiagonal(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    bool isIdentity(RealScalar prec = NumTraits<Scalar>::dummy_precision()) const;
+    bool isDiagonal(RealScalar prec = NumTraits<Scalar>::dummy_precision()) const;

-    bool isUpperTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    bool isLowerTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    bool isUpperTriangular(RealScalar prec = NumTraits<Scalar>::dummy_precision()) const;
+    bool isLowerTriangular(RealScalar prec = NumTraits<Scalar>::dummy_precision()) const;

    template<typename OtherDerived>
    bool isOrthogonal(const MatrixBase<OtherDerived>& other,
-                      const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    bool isUnitary(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+                      RealScalar prec = NumTraits<Scalar>::dummy_precision()) const;
+    bool isUnitary(RealScalar prec = NumTraits<Scalar>::dummy_precision()) const;

    /** \returns true if each coefficients of \c *this and \a other are all exactly equal.
      * \warning When using floating point scalar values you probably should rather use a
@@ -305,37 +300,50 @@ template<typename Derived> class MatrixBase

    NoAlias<Derived,Eigen::MatrixBase > noalias();

-    // TODO forceAlignedAccess is temporarily disabled
-    // Need to find a nicer workaround.
-    inline const Derived& forceAlignedAccess() const { return derived(); }
-    inline Derived& forceAlignedAccess() { return derived(); }
-    template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }
-    template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }
+    inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
+    inline ForceAlignedAccess<Derived> forceAlignedAccess();
+    template<bool Enable> inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type forceAlignedAccessIf() const;
+    template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();

-    EIGEN_DEVICE_FUNC Scalar trace() const;
+    Scalar trace() const;

-    template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
+/////////// Array module ///////////

-    EIGEN_DEVICE_FUNC MatrixBase<Derived>& matrix() { return *this; }
-    EIGEN_DEVICE_FUNC const MatrixBase<Derived>& matrix() const { return *this; }
+    template<int p> RealScalar lpNorm() const;

-    /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix
+    MatrixBase<Derived>& matrix() { return *this; }
+    const MatrixBase<Derived>& matrix() const { return *this; }
+
+    /** \returns an \link ArrayBase Array \endlink expression of this matrix
      * \sa ArrayBase::matrix() */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return ArrayWrapper<Derived>(derived()); }
-    /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix
-      * \sa ArrayBase::matrix() */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return ArrayWrapper<const Derived>(derived()); }
+    ArrayWrapper<Derived> array() { return derived(); }
+    const ArrayWrapper<const Derived> array() const { return derived(); }

 /////////// LU module ///////////

-    EIGEN_DEVICE_FUNC const FullPivLU<PlainObject> fullPivLu() const;
-    EIGEN_DEVICE_FUNC const PartialPivLU<PlainObject> partialPivLu() const;
+    const FullPivLU<PlainObject> fullPivLu() const;
+    const PartialPivLU<PlainObject> partialPivLu() const;

+    #if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS
+    const LU<PlainObject> lu() const;
+    #endif
+
+    #ifdef EIGEN2_SUPPORT
+    const LU<PlainObject> eigen2_lu() const;
+    #endif
+
+    #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
    const PartialPivLU<PlainObject> lu() const;
-
-    EIGEN_DEVICE_FUNC
-    const Inverse<Derived> inverse() const;
+    #endif
    
+    #ifdef EIGEN2_SUPPORT
+    template<typename ResultType>
+    void computeInverse(MatrixBase<ResultType> *result) const {
+      *result = this->inverse();
+    }
+    #endif
+
+    const internal::inverse_impl<Derived> inverse() const;
    template<typename ResultType>
    void computeInverseAndDetWithCheck(
      ResultType& inverse,
@@ -361,6 +369,10 @@ template<typename Derived> class MatrixBase
    const HouseholderQR<PlainObject> householderQr() const;
    const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
    const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
+    
+    #ifdef EIGEN2_SUPPORT
+    const QR<PlainObject> qr() const;
+    #endif

    EigenvaluesReturnType eigenvalues() const;
    RealScalar operatorNorm() const;
@@ -368,7 +380,10 @@ template<typename Derived> class MatrixBase
 /////////// SVD module ///////////

    JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
-    BDCSVD<PlainObject>    bdcSvd(unsigned int computationOptions = 0) const;
+
+    #ifdef EIGEN2_SUPPORT
+    SVD<PlainObject> svd() const;
+    #endif

 /////////// Geometry module ///////////

@@ -380,25 +395,20 @@ template<typename Derived> class MatrixBase
    };
    #endif // EIGEN_PARSED_BY_DOXYGEN
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    typename cross_product_return_type<OtherDerived>::type
    cross(const MatrixBase<OtherDerived>& other) const;
-    
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
-    
-    EIGEN_DEVICE_FUNC
    PlainObject unitOrthogonal(void) const;
-    
    Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
    
+    #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
    ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
    // put this as separate enum value to work around possible GCC 4.3 bug (?)
-    enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
-                                          : ColsAtCompileTime==1 ? Vertical : Horizontal };
+    enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal };
    typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
    HomogeneousReturnType homogeneous() const;
+    #endif
    
    enum {
      SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
@@ -444,16 +454,49 @@ template<typename Derived> class MatrixBase
    const MatrixFunctionReturnValue<Derived> sin() const;
    const MatrixSquareRootReturnValue<Derived> sqrt() const;
    const MatrixLogarithmReturnValue<Derived> log() const;
-    const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
-    const MatrixComplexPowerReturnValue<Derived> pow(const std::complex<RealScalar>& p) const;
+
+#ifdef EIGEN2_SUPPORT
+    template<typename ProductDerived, typename Lhs, typename Rhs>
+    Derived& operator+=(const Flagged<ProductBase<ProductDerived, Lhs,Rhs>, 0,
+                                      EvalBeforeAssigningBit>& other);
+
+    template<typename ProductDerived, typename Lhs, typename Rhs>
+    Derived& operator-=(const Flagged<ProductBase<ProductDerived, Lhs,Rhs>, 0,
+                                      EvalBeforeAssigningBit>& other);
+
+    /** \deprecated because .lazy() is deprecated
+      * Overloaded for cache friendly product evaluation */
+    template<typename OtherDerived>
+    Derived& lazyAssign(const Flagged<OtherDerived, 0, EvalBeforeAssigningBit>& other)
+    { return lazyAssign(other._expression()); }
+
+    template<unsigned int Added>
+    const Flagged<Derived, Added, 0> marked() const;
+    const Flagged<Derived, 0, EvalBeforeAssigningBit> lazy() const;
+
+    inline const Cwise<Derived> cwise() const;
+    inline Cwise<Derived> cwise();
+
+    VectorBlock<Derived> start(Index size);
+    const VectorBlock<const Derived> start(Index size) const;
+    VectorBlock<Derived> end(Index size);
+    const VectorBlock<const Derived> end(Index size) const;
+    template<int Size> VectorBlock<Derived,Size> start();
+    template<int Size> const VectorBlock<const Derived,Size> start() const;
+    template<int Size> VectorBlock<Derived,Size> end();
+    template<int Size> const VectorBlock<const Derived,Size> end() const;
+
+    Minor<Derived> minor(Index row, Index col);
+    const Minor<Derived> minor(Index row, Index col) const;
+#endif

  protected:
-    EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
+    MatrixBase() : Base() {}

  private:
-    EIGEN_DEVICE_FUNC explicit MatrixBase(int);
-    EIGEN_DEVICE_FUNC MatrixBase(int,int);
-    template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase<OtherDerived>&);
+    explicit MatrixBase(int);
+    MatrixBase(int,int);
+    template<typename OtherDerived> explicit MatrixBase(const MatrixBase<OtherDerived>&);
  protected:
    // mixing arrays and matrices is not legal
    template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
@@ -463,51 +506,6 @@ template<typename Derived> class MatrixBase
    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
 };

-
-/***************************************************************************
-* Implementation of matrix base methods
-***************************************************************************/
-
-/** replaces \c *this by \c *this * \a other.
-  *
-  * \returns a reference to \c *this
-  *
-  * Example: \include MatrixBase_applyOnTheRight.cpp
-  * Output: \verbinclude MatrixBase_applyOnTheRight.out
-  */
-template<typename Derived>
-template<typename OtherDerived>
-inline Derived&
-MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
-{
-  other.derived().applyThisOnTheRight(derived());
-  return derived();
-}
-
-/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=().
-  *
-  * Example: \include MatrixBase_applyOnTheRight.cpp
-  * Output: \verbinclude MatrixBase_applyOnTheRight.out
-  */
-template<typename Derived>
-template<typename OtherDerived>
-inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
-{
-  other.derived().applyThisOnTheRight(derived());
-}
-
-/** replaces \c *this by \a other * \c *this.
-  *
-  * Example: \include MatrixBase_applyOnTheLeft.cpp
-  * Output: \verbinclude MatrixBase_applyOnTheLeft.out
-  */
-template<typename Derived>
-template<typename OtherDerived>
-inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
-{
-  other.derived().applyThisOnTheLeft(derived());
-}
-
 } // end namespace Eigen

 #endif // EIGEN_MATRIXBASE_H
--- a/Eigen/src/Core/NestByValue.h
+++ b/Eigen/src/Core/NestByValue.h
@@ -40,29 +40,29 @@ template<typename ExpressionType> class NestByValue
    typedef typename internal::dense_xpr_base<NestByValue>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue)

-    EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}
+    inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}

-    EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); }
-    EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); }
-    EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); }
-    EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); }
+    inline Index rows() const { return m_expression.rows(); }
+    inline Index cols() const { return m_expression.cols(); }
+    inline Index outerStride() const { return m_expression.outerStride(); }
+    inline Index innerStride() const { return m_expression.innerStride(); }

-    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const
+    inline const CoeffReturnType coeff(Index row, Index col) const
    {
      return m_expression.coeff(row, col);
    }

-    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
+    inline Scalar& coeffRef(Index row, Index col)
    {
      return m_expression.const_cast_derived().coeffRef(row, col);
    }

-    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const
+    inline const CoeffReturnType coeff(Index index) const
    {
      return m_expression.coeff(index);
    }

-    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
+    inline Scalar& coeffRef(Index index)
    {
      return m_expression.const_cast_derived().coeffRef(index);
    }
@@ -91,7 +91,7 @@ template<typename ExpressionType> class NestByValue
      m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
    }

-    EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }
+    operator const ExpressionType&() const { return m_expression; }

  protected:
    const ExpressionType m_expression;
--- a/Eigen/src/Core/NoAlias.h
+++ b/Eigen/src/Core/NoAlias.h
@@ -30,41 +30,58 @@ namespace Eigen {
 template<typename ExpressionType, template <typename> class StorageBase>
 class NoAlias
 {
-  public:
    typedef typename ExpressionType::Scalar Scalar;
-    
-    explicit NoAlias(ExpressionType& expression) : m_expression(expression) {}
-    
+  public:
+    NoAlias(ExpressionType& expression) : m_expression(expression) {}
+
+    /** Behaves like MatrixBase::lazyAssign(other)
+      * \sa MatrixBase::lazyAssign() */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
-    {
-      call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar>());
-      return m_expression;
-    }
-    
+    { return internal::assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); }
+
+    /** \sa MatrixBase::operator+= */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
    {
-      call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar>());
-      return m_expression;
-    }
-    
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
-    {
-      call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar>());
+      typedef SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
+      SelfAdder tmp(m_expression);
+      typedef typename internal::nested<OtherDerived>::type OtherDerivedNested;
+      typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested;
+      internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
      return m_expression;
    }

-    EIGEN_DEVICE_FUNC
-    ExpressionType& expression() const
+    /** \sa MatrixBase::operator-= */
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
    {
+      typedef SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
+      SelfAdder tmp(m_expression);
+      typedef typename internal::nested<OtherDerived>::type OtherDerivedNested;
+      typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested;
+      internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
      return m_expression;
    }

+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename ProductDerived, typename Lhs, typename Rhs>
+    EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
+    { other.derived().addTo(m_expression); return m_expression; }
+
+    template<typename ProductDerived, typename Lhs, typename Rhs>
+    EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
+    { other.derived().subTo(m_expression); return m_expression; }
+
+    template<typename Lhs, typename Rhs, int NestingFlags>
+    EIGEN_STRONG_INLINE ExpressionType& operator+=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
+    { return m_expression.derived() += CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
+
+    template<typename Lhs, typename Rhs, int NestingFlags>
+    EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
+    { return m_expression.derived() -= CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
+#endif
+
  protected:
    ExpressionType& m_expression;
 };
@@ -100,7 +117,7 @@ class NoAlias
 template<typename Derived>
 NoAlias<Derived,MatrixBase> MatrixBase<Derived>::noalias()
 {
-  return NoAlias<Derived, Eigen::MatrixBase >(derived());
+  return derived();
 }

 } // end namespace Eigen
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -68,40 +68,21 @@ template<typename T> struct GenericNumTraits
                   >::type NonInteger;
  typedef T Nested;

-  EIGEN_DEVICE_FUNC
-  static inline Real epsilon()
-  {
-    #if defined(__CUDA_ARCH__)
-    return internal::device::numeric_limits<T>::epsilon();
-    #else
-    return std::numeric_limits<T>::epsilon();
-    #endif
-  }
-  EIGEN_DEVICE_FUNC
+  static inline Real epsilon() { return std::numeric_limits<T>::epsilon(); }
  static inline Real dummy_precision()
  {
    // make sure to override this for floating-point types
    return Real(0);
  }
-
-
-  EIGEN_DEVICE_FUNC
-  static inline T highest() {
-#if defined(__CUDA_ARCH__)
-    return (internal::device::numeric_limits<T>::max)();
-#else
-    return (std::numeric_limits<T>::max)();
+  static inline T highest() { return (std::numeric_limits<T>::max)(); }
+  static inline T lowest()  { return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)()); }
+  
+#ifdef EIGEN2_SUPPORT
+  enum {
+    HasFloatingPoint = !IsInteger
+  };
+  typedef NonInteger FloatingPoint;
 #endif
-  }
-
-  EIGEN_DEVICE_FUNC
-  static inline T lowest()  {
-#if defined(__CUDA_ARCH__)
-    return IsInteger ? (internal::device::numeric_limits<T>::min)() : (-(internal::device::numeric_limits<T>::max)());
-#else
-    return IsInteger ? (std::numeric_limits<T>::min)() : (-(std::numeric_limits<T>::max)());
-#endif
-  }
 };

 template<typename T> struct NumTraits : GenericNumTraits<T>
@@ -110,13 +91,11 @@ template<typename T> struct NumTraits : GenericNumTraits<T>
 template<> struct NumTraits<float>
  : GenericNumTraits<float>
 {
-  EIGEN_DEVICE_FUNC
  static inline float dummy_precision() { return 1e-5f; }
 };

 template<> struct NumTraits<double> : GenericNumTraits<double>
 {
-  EIGEN_DEVICE_FUNC
  static inline double dummy_precision() { return 1e-12; }
 };

@@ -161,9 +140,6 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
    AddCost  = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
    MulCost  = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
  };
-  
-  static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
-  static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
 };

 } // end namespace Eigen
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -13,8 +13,7 @@

 namespace Eigen { 

-// TODO: this does not seems to be needed at all:
-// template<int RowCol,typename IndicesType,typename MatrixType, typename StorageKind> class PermutedImpl;
+template<int RowCol,typename IndicesType,typename MatrixType, typename StorageKind> class PermutedImpl;

 /** \class PermutationBase
  * \ingroup Core_Module
@@ -42,6 +41,10 @@ namespace Eigen {

 namespace internal {

+template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false>
+struct permut_matrix_product_retval;
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false>
+struct permut_sparsematrix_product_retval;
 enum PermPermProduct_t {PermPermProduct};

 } // end namespace internal
@@ -57,18 +60,19 @@ class PermutationBase : public EigenBase<Derived>
    typedef typename Traits::IndicesType IndicesType;
    enum {
      Flags = Traits::Flags,
+      CoeffReadCost = Traits::CoeffReadCost,
      RowsAtCompileTime = Traits::RowsAtCompileTime,
      ColsAtCompileTime = Traits::ColsAtCompileTime,
      MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
      MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
    };
-    typedef typename Traits::StorageIndex StorageIndex;
-    typedef Matrix<StorageIndex,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
+    typedef typename Traits::Scalar Scalar;
+    typedef typename Traits::Index Index;
+    typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
            DenseMatrixType;
-    typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,StorageIndex>
+    typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,Index>
            PlainPermutationType;
    using Base::derived;
-    typedef Transpose<PermutationBase> TransposeReturnType;
    #endif

    /** Copies the other permutation into *this */
@@ -114,7 +118,7 @@ class PermutationBase : public EigenBase<Derived>
    void evalTo(MatrixBase<DenseDerived>& other) const
    {
      other.setZero();
-      for (Index i=0; i<rows(); ++i)
+      for (int i=0; i<rows();++i)
        other.coeffRef(indices().coeff(i),i) = typename DenseDerived::Scalar(1);
    }
    #endif
@@ -135,24 +139,23 @@ class PermutationBase : public EigenBase<Derived>

    /** Resizes to given size.
      */
-    inline void resize(Index newSize)
+    inline void resize(Index size)
    {
-      indices().resize(newSize);
+      indices().resize(size);
    }

    /** Sets *this to be the identity permutation matrix */
    void setIdentity()
    {
-      StorageIndex n = StorageIndex(size());
-      for(StorageIndex i = 0; i < n; ++i)
+      for(Index i = 0; i < size(); ++i)
        indices().coeffRef(i) = i;
    }

    /** Sets *this to be the identity permutation matrix of given size.
      */
-    void setIdentity(Index newSize)
+    void setIdentity(Index size)
    {
-      resize(newSize);
+      resize(size);
      setIdentity();
    }

@@ -160,18 +163,18 @@ class PermutationBase : public EigenBase<Derived>
      *
      * \returns a reference to *this.
      *
-      * \warning This is much slower than applyTranspositionOnTheRight(Index,Index):
+      * \warning This is much slower than applyTranspositionOnTheRight(int,int):
      * this has linear complexity and requires a lot of branching.
      *
-      * \sa applyTranspositionOnTheRight(Index,Index)
+      * \sa applyTranspositionOnTheRight(int,int)
      */
    Derived& applyTranspositionOnTheLeft(Index i, Index j)
    {
      eigen_assert(i>=0 && j>=0 && i<size() && j<size());
      for(Index k = 0; k < size(); ++k)
      {
-        if(indices().coeff(k) == i) indices().coeffRef(k) = StorageIndex(j);
-        else if(indices().coeff(k) == j) indices().coeffRef(k) = StorageIndex(i);
+        if(indices().coeff(k) == i) indices().coeffRef(k) = j;
+        else if(indices().coeff(k) == j) indices().coeffRef(k) = i;
      }
      return derived();
    }
@@ -182,7 +185,7 @@ class PermutationBase : public EigenBase<Derived>
      *
      * This is a fast operation, it only consists in swapping two indices.
      *
-      * \sa applyTranspositionOnTheLeft(Index,Index)
+      * \sa applyTranspositionOnTheLeft(int,int)
      */
    Derived& applyTranspositionOnTheRight(Index i, Index j)
    {
@@ -195,14 +198,14 @@ class PermutationBase : public EigenBase<Derived>
      *
      * \note \note_try_to_help_rvo
      */
-    inline TransposeReturnType inverse() const
-    { return TransposeReturnType(derived()); }
+    inline Transpose<PermutationBase> inverse() const
+    { return derived(); }
    /** \returns the tranpose permutation matrix.
      *
      * \note \note_try_to_help_rvo
      */
-    inline TransposeReturnType transpose() const
-    { return TransposeReturnType(derived()); }
+    inline Transpose<PermutationBase> transpose() const
+    { return derived(); }

    /**** multiplication helpers to hopefully get RVO ****/

@@ -212,13 +215,13 @@ class PermutationBase : public EigenBase<Derived>
    template<typename OtherDerived>
    void assignTranspose(const PermutationBase<OtherDerived>& other)
    {
-      for (Index i=0; i<rows();++i) indices().coeffRef(other.indices().coeff(i)) = i;
+      for (int i=0; i<rows();++i) indices().coeffRef(other.indices().coeff(i)) = i;
    }
    template<typename Lhs,typename Rhs>
    void assignProduct(const Lhs& lhs, const Rhs& rhs)
    {
      eigen_assert(lhs.cols() == rhs.rows());
-      for (Index i=0; i<rows();++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i));
+      for (int i=0; i<rows();++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i));
    }
 #endif

@@ -247,35 +250,6 @@ class PermutationBase : public EigenBase<Derived>
    template<typename Other> friend
    inline PlainPermutationType operator*(const Transpose<PermutationBase<Other> >& other, const PermutationBase& perm)
    { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); }
-    
-    /** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation.
-      *
-      * This function is O(\c n) procedure allocating a buffer of \c n booleans.
-      */
-    Index determinant() const
-    {
-      Index res = 1;
-      Index n = size();
-      Matrix<bool,RowsAtCompileTime,1,0,MaxRowsAtCompileTime> mask(n);
-      mask.fill(false);
-      Index r = 0;
-      while(r < n)
-      {
-        // search for the next seed
-        while(r<n && mask[r]) r++;
-        if(r>=n)
-          break;
-        // we got one, let's follow it until we are back to the seed
-        Index k0 = r++;
-        mask.coeffRef(k0) = true;
-        for(Index k=indices().coeff(k0); k!=k0; k=indices().coeff(k))
-        {
-          mask.coeffRef(k) = true;
-          res = -res;
-        }
-      }
-      return res;
-    }

  protected:

@@ -288,7 +262,7 @@ class PermutationBase : public EigenBase<Derived>
  *
  * \param SizeAtCompileTime the number of rows/cols, or Dynamic
  * \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
-  * \param StorageIndex the integer type of the indices
+  * \param IndexType the interger type of the indices
  *
  * This class represents a permutation matrix, internally stored as a vector of integers.
  *
@@ -296,28 +270,24 @@ class PermutationBase : public EigenBase<Derived>
  */

 namespace internal {
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
-struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
- : traits<Matrix<_StorageIndex,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
+struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
+ : traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
-  typedef PermutationStorage StorageKind;
-  typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
-  typedef _StorageIndex StorageIndex;
+  typedef IndexType Index;
+  typedef Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
 };
 }

-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
-class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
+class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
 {
    typedef PermutationBase<PermutationMatrix> Base;
    typedef internal::traits<PermutationMatrix> Traits;
  public:

-    typedef const PermutationMatrix& Nested;
-
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    typedef typename Traits::IndicesType IndicesType;
-    typedef typename Traits::StorageIndex StorageIndex;
    #endif

    inline PermutationMatrix()
@@ -325,10 +295,8 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile

    /** Constructs an uninitialized permutation matrix of given size.
      */
-    explicit inline PermutationMatrix(Index size) : m_indices(size)
-    {
-      eigen_internal_assert(size <= NumTraits<StorageIndex>::highest());
-    }
+    inline PermutationMatrix(int size) : m_indices(size)
+    {}

    /** Copy constructor. */
    template<typename OtherDerived>
@@ -397,12 +365,9 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
 #ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename Other>
    PermutationMatrix(const Transpose<PermutationBase<Other> >& other)
-      : m_indices(other.nestedExpression().size())
+      : m_indices(other.nestedPermutation().size())
    {
-      eigen_internal_assert(m_indices.size() <= NumTraits<StorageIndex>::highest());
-      StorageIndex end = StorageIndex(m_indices.size());
-      for (StorageIndex i=0; i<end;++i)
-        m_indices.coeffRef(other.nestedExpression().indices().coeff(i)) = i;
+      for (int i=0; i<m_indices.size();++i) m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i;
    }
    template<typename Lhs,typename Rhs>
    PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs)
@@ -419,19 +384,18 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile


 namespace internal {
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
-struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess> >
- : traits<Matrix<_StorageIndex,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
+struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
+ : traits<Matrix<IndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
-  typedef PermutationStorage StorageKind;
-  typedef Map<const Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
-  typedef _StorageIndex StorageIndex;
+  typedef IndexType Index;
+  typedef Map<const Matrix<IndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
 };
 }

-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
-class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess>
-  : public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
+class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess>
+  : public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType>,_PacketAccess> >
 {
    typedef PermutationBase<Map> Base;
    typedef internal::traits<Map> Traits;
@@ -439,15 +403,15 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd

    #ifndef EIGEN_PARSED_BY_DOXYGEN
    typedef typename Traits::IndicesType IndicesType;
-    typedef typename IndicesType::Scalar StorageIndex;
+    typedef typename IndicesType::Scalar Index;
    #endif

-    inline Map(const StorageIndex* indicesPtr)
-      : m_indices(indicesPtr)
+    inline Map(const Index* indices)
+      : m_indices(indices)
    {}

-    inline Map(const StorageIndex* indicesPtr, Index size)
-      : m_indices(indicesPtr,size)
+    inline Map(const Index* indices, Index size)
+      : m_indices(indices,size)
    {}

    /** Copies the other permutation into *this */
@@ -493,6 +457,8 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd
  * \sa class PermutationBase, class PermutationMatrix
  */

+struct PermutationStorage {};
+
 template<typename _IndicesType> class TranspositionsWrapper;
 namespace internal {
 template<typename _IndicesType>
@@ -500,14 +466,15 @@ struct traits<PermutationWrapper<_IndicesType> >
 {
  typedef PermutationStorage StorageKind;
  typedef typename _IndicesType::Scalar Scalar;
-  typedef typename _IndicesType::Scalar StorageIndex;
+  typedef typename _IndicesType::Scalar Index;
  typedef _IndicesType IndicesType;
  enum {
    RowsAtCompileTime = _IndicesType::SizeAtCompileTime,
    ColsAtCompileTime = _IndicesType::SizeAtCompileTime,
-    MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
-    MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
-    Flags = 0
+    MaxRowsAtCompileTime = IndicesType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = IndicesType::MaxColsAtCompileTime,
+    Flags = 0,
+    CoeffReadCost = _IndicesType::CoeffReadCost
  };
 };
 }
@@ -536,33 +503,103 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
    typename IndicesType::Nested m_indices;
 };

-
 /** \returns the matrix with the permutation applied to the columns.
  */
-template<typename MatrixDerived, typename PermutationDerived>
-EIGEN_DEVICE_FUNC
-const Product<MatrixDerived, PermutationDerived, AliasFreeProduct>
-operator*(const MatrixBase<MatrixDerived> &matrix,
-          const PermutationBase<PermutationDerived>& permutation)
+template<typename Derived, typename PermutationDerived>
+inline const internal::permut_matrix_product_retval<PermutationDerived, Derived, OnTheRight>
+operator*(const MatrixBase<Derived>& matrix,
+          const PermutationBase<PermutationDerived> &permutation)
 {
-  return Product<MatrixDerived, PermutationDerived, AliasFreeProduct>
-            (matrix.derived(), permutation.derived());
+  return internal::permut_matrix_product_retval
+           <PermutationDerived, Derived, OnTheRight>
+           (permutation.derived(), matrix.derived());
 }

 /** \returns the matrix with the permutation applied to the rows.
  */
-template<typename PermutationDerived, typename MatrixDerived>
-EIGEN_DEVICE_FUNC
-const Product<PermutationDerived, MatrixDerived, AliasFreeProduct>
+template<typename Derived, typename PermutationDerived>
+inline const internal::permut_matrix_product_retval
+               <PermutationDerived, Derived, OnTheLeft>
 operator*(const PermutationBase<PermutationDerived> &permutation,
-          const MatrixBase<MatrixDerived>& matrix)
+          const MatrixBase<Derived>& matrix)
 {
-  return Product<PermutationDerived, MatrixDerived, AliasFreeProduct>
-            (permutation.derived(), matrix.derived());
+  return internal::permut_matrix_product_retval
+           <PermutationDerived, Derived, OnTheLeft>
+           (permutation.derived(), matrix.derived());
 }

 namespace internal {

+template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
+struct traits<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
+{
+  typedef typename MatrixType::PlainObject ReturnType;
+};
+
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
+struct permut_matrix_product_retval
+ : public ReturnByValue<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
+{
+    typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
+
+    permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix)
+      : m_permutation(perm), m_matrix(matrix)
+    {}
+
+    inline int rows() const { return m_matrix.rows(); }
+    inline int cols() const { return m_matrix.cols(); }
+
+    template<typename Dest> inline void evalTo(Dest& dst) const
+    {
+      const int n = Side==OnTheLeft ? rows() : cols();
+
+      if(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix))
+      {
+        // apply the permutation inplace
+        Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
+        mask.fill(false);
+        int r = 0;
+        while(r < m_permutation.size())
+        {
+          // search for the next seed
+          while(r<m_permutation.size() && mask[r]) r++;
+          if(r>=m_permutation.size())
+            break;
+          // we got one, let's follow it until we are back to the seed
+          int k0 = r++;
+          int kPrev = k0;
+          mask.coeffRef(k0) = true;
+          for(int k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
+          {
+                  Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
+            .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+                       (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
+
+            mask.coeffRef(k) = true;
+            kPrev = k;
+          }
+        }
+      }
+      else
+      {
+        for(int i = 0; i < n; ++i)
+        {
+          Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+               (dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i)
+
+          =
+
+          Block<const MatrixTypeNestedCleaned,Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime>
+               (m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i);
+        }
+      }
+    }
+
+  protected:
+    const PermutationType& m_permutation;
+    typename MatrixType::Nested m_matrix;
+};
+
 /* Template partial specialization for transposed/inverse permutations */

 template<typename Derived>
@@ -572,8 +609,6 @@ struct traits<Transpose<PermutationBase<Derived> > >

 } // end namespace internal

-// TODO: the specificties should be handled by the evaluator,
-// at the very least we should only specialize TransposeImpl
 template<typename Derived>
 class Transpose<PermutationBase<Derived> >
  : public EigenBase<Transpose<PermutationBase<Derived> > >
@@ -588,26 +623,26 @@ class Transpose<PermutationBase<Derived> >
    typedef typename Derived::DenseMatrixType DenseMatrixType;
    enum {
      Flags = Traits::Flags,
+      CoeffReadCost = Traits::CoeffReadCost,
      RowsAtCompileTime = Traits::RowsAtCompileTime,
      ColsAtCompileTime = Traits::ColsAtCompileTime,
      MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
      MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
    };
    typedef typename Traits::Scalar Scalar;
-    typedef typename Traits::StorageIndex StorageIndex;
    #endif

    Transpose(const PermutationType& p) : m_permutation(p) {}

-    inline Index rows() const { return m_permutation.rows(); }
-    inline Index cols() const { return m_permutation.cols(); }
+    inline int rows() const { return m_permutation.rows(); }
+    inline int cols() const { return m_permutation.cols(); }

    #ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename DenseDerived>
    void evalTo(MatrixBase<DenseDerived>& other) const
    {
      other.setZero();
-      for (Index i=0; i<rows();++i)
+      for (int i=0; i<rows();++i)
        other.coeffRef(i, m_permutation.indices().coeff(i)) = typename DenseDerived::Scalar(1);
    }
    #endif
@@ -620,22 +655,22 @@ class Transpose<PermutationBase<Derived> >
    /** \returns the matrix with the inverse permutation applied to the columns.
      */
    template<typename OtherDerived> friend
-    const Product<OtherDerived, Transpose, AliasFreeProduct>
+    inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>
    operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trPerm)
    {
-      return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trPerm.derived());
+      return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>(trPerm.m_permutation, matrix.derived());
    }

    /** \returns the matrix with the inverse permutation applied to the rows.
      */
    template<typename OtherDerived>
-    const Product<Transpose, OtherDerived, AliasFreeProduct>
+    inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>
    operator*(const MatrixBase<OtherDerived>& matrix) const
    {
-      return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived());
+      return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>(m_permutation, matrix.derived());
    }

-    const PermutationType& nestedExpression() const { return m_permutation; }
+    const PermutationType& nestedPermutation() const { return m_permutation; }

  protected:
    const PermutationType& m_permutation;
@@ -647,12 +682,6 @@ const PermutationWrapper<const Derived> MatrixBase<Derived>::asPermutation() con
  return derived();
 }

-namespace internal {
-
-template<> struct AssignmentKind<DenseShape,PermutationShape> { typedef EigenBase2EigenBase Kind; };
-
-} // end namespace internal
-
 } // end namespace Eigen

 #endif // EIGEN_PERMUTATIONMATRIX_H
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -11,48 +11,30 @@
 #ifndef EIGEN_DENSESTORAGEBASE_H
 #define EIGEN_DENSESTORAGEBASE_H

-#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO)
-# define EIGEN_INITIALIZE_COEFFS
-# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
-#elif defined(EIGEN_INITIALIZE_MATRICES_BY_NAN)
-# define EIGEN_INITIALIZE_COEFFS
-# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=std::numeric_limits<Scalar>::quiet_NaN();
+#ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
+# define EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
 #else
-# undef EIGEN_INITIALIZE_COEFFS
-# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+# define EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
 #endif

 namespace Eigen {

 namespace internal {

-template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
-  template<typename Index>
-  EIGEN_DEVICE_FUNC
-  static EIGEN_ALWAYS_INLINE void run(Index, Index)
-  {
-  }
-};
+template<typename Index>
+EIGEN_ALWAYS_INLINE void check_rows_cols_for_overflow(Index rows, Index cols)
+{
+  // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
+  // we assume Index is signed
+  Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
+  bool error = (rows < 0  || cols < 0)  ? true
+             : (rows == 0 || cols == 0) ? false
+                                        : (rows > max_index / cols);
+  if (error)
+    throw_std_bad_alloc();
+}

-template<> struct check_rows_cols_for_overflow<Dynamic> {
-  template<typename Index>
-  EIGEN_DEVICE_FUNC
-  static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)
-  {
-    // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
-    // we assume Index is signed
-    Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
-    bool error = (rows == 0 || cols == 0) ? false
-               : (rows > max_index / cols);
-    if (error)
-      throw_std_bad_alloc();
-  }
-};
-
-template <typename Derived,
-          typename OtherDerived = Derived,
-          bool IsVector = bool(Derived::IsVectorAtCompileTime) && bool(OtherDerived::IsVectorAtCompileTime)>
-struct conservative_resize_like_impl;
+template <typename Derived, typename OtherDerived = Derived, bool IsVector = bool(Derived::IsVectorAtCompileTime)> struct conservative_resize_like_impl;

 template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;

@@ -69,9 +51,8 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
 #ifdef EIGEN_PARSED_BY_DOXYGEN
 namespace internal {

-// this is a workaround to doxygen not being able to understand the inheritance logic
+// this is a warkaround to doxygen not being able to understand the inheritence logic
 // when it is hidden by the dense_xpr_base helper struct.
-/** This class is just a workaround for Doxygen and it does not not actually exist. */
 template<typename Derived> struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase<Derived> {};
 /** This class is just a workaround for Doxygen and it does not not actually exist. */
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
@@ -96,8 +77,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    typedef typename internal::dense_xpr_base<Derived>::type Base;

    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
-    
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
    typedef Derived DenseType;
@@ -116,75 +97,62 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    typedef Eigen::Map<Derived, Unaligned>  MapType;
    friend  class Eigen::Map<const Derived, Unaligned>;
    typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
-#if EIGEN_MAX_ALIGN_BYTES>0
-    // for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice.
-    friend  class Eigen::Map<Derived, AlignedMax>;
-    friend  class Eigen::Map<const Derived, AlignedMax>;
-#endif
-    typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;
-    typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;
+    friend  class Eigen::Map<Derived, Aligned>;
+    typedef Eigen::Map<Derived, Aligned> AlignedMapType;
+    friend  class Eigen::Map<const Derived, Aligned>;
+    typedef const Eigen::Map<const Derived, Aligned> ConstAlignedMapType;
    template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
    template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
-    template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, AlignedMax, StrideType> type; };
-    template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, AlignedMax, StrideType> type; };
+    template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; };
+    template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; };

  protected:
    DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;

  public:
-    enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits<Derived>::Alignment>0) };
+    enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)

-    EIGEN_DEVICE_FUNC
    Base& base() { return *static_cast<Base*>(this); }
-    EIGEN_DEVICE_FUNC
    const Base& base() const { return *static_cast<const Base*>(this); }

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); }
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); }

-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
+    EIGEN_STRONG_INLINE const Scalar& coeff(Index row, Index col) const
    {
      if(Flags & RowMajorBit)
-        return m_storage.data()[colId + rowId * m_storage.cols()];
+        return m_storage.data()[col + row * m_storage.cols()];
      else // column-major
-        return m_storage.data()[rowId + colId * m_storage.rows()];
+        return m_storage.data()[row + col * m_storage.rows()];
    }

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
    {
      return m_storage.data()[index];
    }

-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
+    EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
    {
      if(Flags & RowMajorBit)
-        return m_storage.data()[colId + rowId * m_storage.cols()];
+        return m_storage.data()[col + row * m_storage.cols()];
      else // column-major
-        return m_storage.data()[rowId + colId * m_storage.rows()];
+        return m_storage.data()[row + col * m_storage.rows()];
    }

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
    {
      return m_storage.data()[index];
    }

-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
+    EIGEN_STRONG_INLINE const Scalar& coeffRef(Index row, Index col) const
    {
      if(Flags & RowMajorBit)
-        return m_storage.data()[colId + rowId * m_storage.cols()];
+        return m_storage.data()[col + row * m_storage.cols()];
      else // column-major
-        return m_storage.data()[rowId + colId * m_storage.rows()];
+        return m_storage.data()[row + col * m_storage.rows()];
    }

-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
    {
      return m_storage.data()[index];
@@ -192,12 +160,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type

    /** \internal */
    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
+    EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
    {
      return internal::ploadt<PacketScalar, LoadMode>
               (m_storage.data() + (Flags & RowMajorBit
-                                   ? colId + rowId * m_storage.cols()
-                                   : rowId + colId * m_storage.rows()));
+                                   ? col + row * m_storage.cols()
+                                   : row + col * m_storage.rows()));
    }

    /** \internal */
@@ -209,27 +177,27 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type

    /** \internal */
    template<int StoreMode>
-    EIGEN_STRONG_INLINE void writePacket(Index rowId, Index colId, const PacketScalar& val)
+    EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketScalar& x)
    {
      internal::pstoret<Scalar, PacketScalar, StoreMode>
              (m_storage.data() + (Flags & RowMajorBit
-                                   ? colId + rowId * m_storage.cols()
-                                   : rowId + colId * m_storage.rows()), val);
+                                   ? col + row * m_storage.cols()
+                                   : row + col * m_storage.rows()), x);
    }

    /** \internal */
    template<int StoreMode>
-    EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& val)
+    EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& x)
    {
-      internal::pstoret<Scalar, PacketScalar, StoreMode>(m_storage.data() + index, val);
+      internal::pstoret<Scalar, PacketScalar, StoreMode>(m_storage.data() + index, x);
    }

    /** \returns a const pointer to the data array of this matrix */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const
+    EIGEN_STRONG_INLINE const Scalar *data() const
    { return m_storage.data(); }

    /** \returns a pointer to the data array of this matrix */
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data()
+    EIGEN_STRONG_INLINE Scalar *data()
    { return m_storage.data(); }

    /** Resizes \c *this to a \a rows x \a cols matrix.
@@ -248,22 +216,16 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)
      */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
    {
-      eigen_assert(   EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime)
-                   && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime)
-                   && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime)
-                   && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime)
-                   && rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array.");
-      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
-      #ifdef EIGEN_INITIALIZE_COEFFS
+      #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
+        internal::check_rows_cols_for_overflow(rows, cols);
        Index size = rows*cols;
        bool size_changed = size != this->size();
        m_storage.resize(size, rows, cols);
-        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+        if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
      #else
-        internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
+        internal::check_rows_cols_for_overflow(rows, cols);
        m_storage.resize(rows*cols, rows, cols);
      #endif
    }
@@ -279,20 +241,19 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t)
      */
-    EIGEN_DEVICE_FUNC
    inline void resize(Index size)
    {
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
-      eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0);
-      #ifdef EIGEN_INITIALIZE_COEFFS
+      eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == size);
+      #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
        bool size_changed = size != this->size();
      #endif
      if(RowsAtCompileTime == 1)
        m_storage.resize(size, 1, size);
      else
        m_storage.resize(size, size, 1);
-      #ifdef EIGEN_INITIALIZE_COEFFS
-        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+      #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
+        if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
      #endif
    }

@@ -304,7 +265,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * \sa resize(Index,Index)
      */
-    EIGEN_DEVICE_FUNC
    inline void resize(NoChange_t, Index cols)
    {
      resize(rows(), cols);
@@ -318,7 +278,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * \sa resize(Index,Index)
      */
-    EIGEN_DEVICE_FUNC
    inline void resize(Index rows, NoChange_t)
    {
      resize(rows, cols());
@@ -332,11 +291,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      * remain row-vectors and vectors remain vectors.
      */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC 
    EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
    {
      const OtherDerived& other = _other.derived();
-      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.rows(), other.cols());
+      internal::check_rows_cols_for_overflow(other.rows(), other.cols());
      const Index othersize = other.rows()*other.cols();
      if(RowsAtCompileTime == 1)
      {
@@ -360,7 +318,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      * Matrices are resized relative to the top-left element. In case values need to be 
      * appended to the matrix they will be uninitialized.
      */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols)
    {
      internal::conservative_resize_like_impl<Derived>::run(*this, rows, cols);
@@ -373,7 +330,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * In case the matrix is growing, new rows will be uninitialized.
      */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t)
    {
      // Note: see the comment in conservativeResize(Index,Index)
@@ -387,7 +343,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * In case the matrix is growing, new columns will be uninitialized.
      */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols)
    {
      // Note: see the comment in conservativeResize(Index,Index)
@@ -402,7 +357,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * When values are appended, they will be uninitialized.
      */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void conservativeResize(Index size)
    {
      internal::conservative_resize_like_impl<Derived>::run(*this, size);
@@ -418,7 +372,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      * appended to the matrix they will copied from \c other.
      */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)
    {
      internal::conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);
@@ -427,7 +380,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    /** This is a special case of the templated operator=. Its purpose is to
      * prevent a default operator= from hiding the templated operator=.
      */
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other)
    {
      return _set(other);
@@ -435,7 +387,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type

    /** \sa MatrixBase::lazyAssign() */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase<OtherDerived>& other)
    {
      _resize_to_match(other);
@@ -443,63 +394,38 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    }

    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue<OtherDerived>& func)
    {
      resize(func.rows(), func.cols());
      return Base::operator=(func);
    }

-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
+    EIGEN_STRONG_INLINE explicit PlainObjectBase() : m_storage()
    {
 //       _check_template_params();
-//       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+//       EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
    }

 #ifndef EIGEN_PARSED_BY_DOXYGEN
    // FIXME is it still needed ?
    /** \internal */
-    EIGEN_DEVICE_FUNC
-    explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
+    PlainObjectBase(internal::constructor_without_unaligned_array_assert)
      : m_storage(internal::constructor_without_unaligned_array_assert())
    {
-//       _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+//       _check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
    }
 #endif

-#ifdef EIGEN_HAVE_RVALUE_REFERENCES
-    EIGEN_DEVICE_FUNC
-    PlainObjectBase(PlainObjectBase&& other)
-      : m_storage( std::move(other.m_storage) )
-    {
-    }
-
-    EIGEN_DEVICE_FUNC
-    PlainObjectBase& operator=(PlainObjectBase&& other)
-    {
-      using std::swap;
-      swap(m_storage, other.m_storage);
-      return *this;
-    }
-#endif
-
-    /** Copy constructor */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
-      : Base(), m_storage(other.m_storage) { }
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
      : m_storage(size, rows, cols)
    {
 //       _check_template_params();
-//       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+//       EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
    }

    /** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
      */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC 
    EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
    {
      _resize_to_match(other);
@@ -507,36 +433,14 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      return this->derived();
    }

-    /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
+    /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
-      : m_storage()
-    {
-      _check_template_params();
-      resizeLike(other);
-      _set_noalias(other);
-    }
-
-    /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC 
    EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
-      : m_storage()
+      : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
    {
      _check_template_params();
-      resizeLike(other);
-      *this = other.derived();
-    }
-    /** \brief Copy constructor with in-place evaluation */
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other)
-    {
-      _check_template_params();
-      // FIXME this does not automatically transpose vectors if necessary
-      resize(other.rows(), other.cols());
-      other.evalTo(this->derived());
+      internal::check_rows_cols_for_overflow(other.derived().rows(), other.derived().cols());
+      Base::operator=(other.derived());
    }

    /** \name Map
@@ -613,16 +517,16 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    //@}

    using Base::setConstant;
-    EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& value);
-    EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& value);
+    Derived& setConstant(Index size, const Scalar& value);
+    Derived& setConstant(Index rows, Index cols, const Scalar& value);

    using Base::setZero;
-    EIGEN_DEVICE_FUNC Derived& setZero(Index size);
-    EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols);
+    Derived& setZero(Index size);
+    Derived& setZero(Index rows, Index cols);

    using Base::setOnes;
-    EIGEN_DEVICE_FUNC Derived& setOnes(Index size);
-    EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols);
+    Derived& setOnes(Index size);
+    Derived& setOnes(Index rows, Index cols);

    using Base::setRandom;
    Derived& setRandom(Index size);
@@ -641,7 +545,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      * remain row-vectors and vectors remain vectors.
      */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC 
    EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)
    {
      #ifdef EIGEN_NO_AUTOMATIC_RESIZING
@@ -668,23 +571,25 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      *
      * \internal
      */
-    // aliasing is dealt once in internall::call_assignment
-    // so at this stage we have to assume aliasing... and resising has to be done later.
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC 
    EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
    {
-      internal::call_assignment(this->derived(), other.derived());
+      _set_selector(other.derived(), typename internal::conditional<static_cast<bool>(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type());
      return this->derived();
    }

+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); }
+
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); }
+
    /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which
      * is the case when creating a new matrix) so one can enforce lazy evaluation.
      *
      * \sa operator=(const MatrixBase<OtherDerived>&), _set()
      */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC 
    EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other)
    {
      // I don't think we need this resize call since the lazyAssign will anyways resize
@@ -692,166 +597,44 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      //_resize_to_match(other);
      // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because
      // it wouldn't allow to copy a row-vector into a column-vector.
-      internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar>());
-      return this->derived();
+      return internal::assign_selector<Derived,OtherDerived,false>::run(this->derived(), other.derived());
    }

    template<typename T0, typename T1>
-    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
    {
      EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
                          bool(NumTraits<T1>::IsInteger),
                          FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
-      resize(rows,cols);
+      eigen_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
+             && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
+      internal::check_rows_cols_for_overflow(rows, cols);      
+      m_storage.resize(rows*cols,rows,cols);
+      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
    }
-    
    template<typename T0, typename T1>
-    EIGEN_DEVICE_FUNC 
-    EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
+    EIGEN_STRONG_INLINE void _init2(const Scalar& x, const Scalar& y, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
    {
      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
-      m_storage.data()[0] = val0;
-      m_storage.data()[1] = val1;
-    }
-    
-    template<typename T0, typename T1>
-    EIGEN_DEVICE_FUNC 
-    EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1,
-                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)
-                                                                  && (internal::is_same<T0,Index>::value)
-                                                                  && (internal::is_same<T1,Index>::value)
-                                                                  && Base::SizeAtCompileTime==2,T1>::type* = 0)
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
-      m_storage.data()[0] = Scalar(val0);
-      m_storage.data()[1] = Scalar(val1);
+      m_storage.data()[0] = x;
+      m_storage.data()[1] = y;
    }

-    // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array,
-    // then the argument is meant to be the size of the object.
-    template<typename T>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<    (Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value)
-                                                                              && ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0)
-    {
-      // NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
-      const bool is_integer = NumTraits<T>::IsInteger;
-      EIGEN_STATIC_ASSERT(is_integer,
-                          FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
-      resize(size);
-    }
-    
-    // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitely converted)
-    template<typename T>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0)
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
-      m_storage.data()[0] = val0;
-    }
-    
-    // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type match the index type)
-    template<typename T>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init1(const Index& val0,
-                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)
-                                                                  && (internal::is_same<Index,T>::value)
-                                                                  && Base::SizeAtCompileTime==1
-                                                                  && internal::is_convertible<T, Scalar>::value,T*>::type* = 0)
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
-      m_storage.data()[0] = Scalar(val0);
-    }
-
-    // Initialize a fixed size matrix from a pointer to raw data
-    template<typename T>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init1(const Scalar* data){
-      this->_set_noalias(ConstMapType(data));
-    }
-
-    // Initialize an arbitrary matrix from a dense expression
-    template<typename T, typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other){
-      this->_set_noalias(other);
-    }
-
-    // Initialize an arbitrary matrix from a generic Eigen expression
-    template<typename T, typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){
-      this->derived() = other;
-    }
-
-    template<typename T, typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init1(const ReturnByValue<OtherDerived>& other)
-    {
-      resize(other.rows(), other.cols());
-      other.evalTo(this->derived());
-    }
-
-    template<typename T, typename OtherDerived, int ColsAtCompileTime>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init1(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
-    {
-      this->derived() = r;
-    }
-    
-    // For fixed -size arrays:
-    template<typename T>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init1(const Scalar& val0,
-                                    typename internal::enable_if<    Base::SizeAtCompileTime!=Dynamic
-                                                                  && Base::SizeAtCompileTime!=1
-                                                                  && internal::is_convertible<T, Scalar>::value
-                                                                  && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T>::type* = 0)
-    {
-      Base::setConstant(val0);
-    }
-    
-    template<typename T>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init1(const Index& val0,
-                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)
-                                                                  && (internal::is_same<Index,T>::value)
-                                                                  && Base::SizeAtCompileTime!=Dynamic
-                                                                  && Base::SizeAtCompileTime!=1
-                                                                  && internal::is_convertible<T, Scalar>::value
-                                                                  && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T*>::type* = 0)
-    {
-      Base::setConstant(val0);
-    }
-    
    template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
    friend struct internal::matrix_swap_impl;

-  public:
-    
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-    /** \internal
-      * \brief Override DenseBase::swap() since for dynamic-sized matrices
-      * of same type it is enough to swap the data pointers.
+    /** \internal generic implementation of swap for dense storage since for dynamic-sized matrices of same type it is enough to swap the
+      * data pointers.
      */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    void swap(DenseBase<OtherDerived> & other)
+    void _swap(DenseBase<OtherDerived> const & other)
    {
      enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
-      internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.derived());
+      internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.const_cast_derived());
    }
-    
-    /** \internal
-      * \brief const version forwarded to DenseBase::swap
-      */
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    void swap(DenseBase<OtherDerived> const & other)
-    { Base::swap(other.derived()); }
-    
-    EIGEN_DEVICE_FUNC 
+
+  public:
+#ifndef EIGEN_PARSED_BY_DOXYGEN
    static EIGEN_STRONG_INLINE void _check_template_params()
    {
      EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
@@ -865,16 +648,16 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
                        && (Options & (DontAlign|RowMajor)) == Options),
        INVALID_MATRIX_TEMPLATE_PARAMETERS)
    }
-
-    enum { IsPlainObjectBase = 1 };
 #endif
+
+private:
+    enum { ThisConstantIsPrivateInPlainObjectBase };
 };

-namespace internal {
-
 template <typename Derived, typename OtherDerived, bool IsVector>
-struct conservative_resize_like_impl
+struct internal::conservative_resize_like_impl
 {
+  typedef typename Derived::Index Index;
  static void run(DenseBase<Derived>& _this, Index rows, Index cols)
  {
    if (_this.rows() == rows && _this.cols() == cols) return;
@@ -883,7 +666,7 @@ struct conservative_resize_like_impl
    if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows
         (!Derived::IsRowMajor && _this.rows() == rows) )  // column-major and we change only the number of columns
    {
-      internal::check_rows_cols_for_overflow<Derived::MaxSizeAtCompileTime>::run(rows, cols);
+      internal::check_rows_cols_for_overflow(rows, cols);
      _this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
    }
    else
@@ -932,14 +715,12 @@ struct conservative_resize_like_impl
  }
 };

-// Here, the specialization for vectors inherits from the general matrix case
-// to allow calling .conservativeResize(rows,cols) on vectors.
+namespace internal {
+
 template <typename Derived, typename OtherDerived>
 struct conservative_resize_like_impl<Derived,OtherDerived,true>
-  : conservative_resize_like_impl<Derived,OtherDerived,false>
 {
-  using conservative_resize_like_impl<Derived,OtherDerived,false>::run;
-  
+  typedef typename Derived::Index Index;
  static void run(DenseBase<Derived>& _this, Index size)
  {
    const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : size;
@@ -965,7 +746,6 @@ struct conservative_resize_like_impl<Derived,OtherDerived,true>
 template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
 struct matrix_swap_impl
 {
-  EIGEN_DEVICE_FUNC
  static inline void run(MatrixTypeA& a, MatrixTypeB& b)
  {
    a.base().swap(b);
@@ -975,7 +755,6 @@ struct matrix_swap_impl
 template<typename MatrixTypeA, typename MatrixTypeB>
 struct matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
 {
-  EIGEN_DEVICE_FUNC
  static inline void run(MatrixTypeA& a, MatrixTypeB& b)
  {
    static_cast<typename MatrixTypeA::Base&>(a).m_storage.swap(static_cast<typename MatrixTypeB::Base&>(b).m_storage);
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -3,16 +3,15 @@
 //
 // Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.

 #ifndef EIGEN_PRODUCT_H
 #define EIGEN_PRODUCT_H

-namespace Eigen {
-
-template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
+template<typename Lhs, typename Rhs> class Product;
+template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;

 /** \class Product
  * \ingroup Core_Module
@@ -23,223 +22,77 @@ template<typename Lhs, typename Rhs, int Option, typename StorageKind> class Pro
  * \param Rhs the type of the right-hand side expression
  *
  * This class represents an expression of the product of two arbitrary matrices.
-  * 
-  * The other template parameters are:
-  * \tparam Option     can be DefaultProduct, AliasFreeProduct, or LazyProduct
  *
  */

-
 namespace internal {
-
-// Determine the scalar of Product<Lhs, Rhs>. This is normally the same as Lhs::Scalar times
-// Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor.
-template<typename Lhs, typename Rhs, typename LhsShape = typename evaluator_traits<Lhs>::Shape, 
-         typename RhsShape = typename evaluator_traits<Rhs>::Shape >
-struct product_result_scalar
-{
-  typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
-};
-
-template<typename Lhs, typename Rhs, typename RhsShape>
-struct product_result_scalar<Lhs, Rhs, PermutationShape, RhsShape>
-{
-  typedef typename Rhs::Scalar Scalar;
-};
-
-template<typename Lhs, typename Rhs, typename LhsShape>
-  struct product_result_scalar<Lhs, Rhs, LhsShape, PermutationShape>
-{
-  typedef typename Lhs::Scalar Scalar;
-};
-
-template<typename Lhs, typename Rhs, typename RhsShape>
-struct product_result_scalar<Lhs, Rhs, TranspositionsShape, RhsShape>
-{
-  typedef typename Rhs::Scalar Scalar;
-};
-
-template<typename Lhs, typename Rhs, typename LhsShape>
-  struct product_result_scalar<Lhs, Rhs, LhsShape, TranspositionsShape>
-{
-  typedef typename Lhs::Scalar Scalar;
-};
-
-template<typename Lhs, typename Rhs, int Option>
-struct traits<Product<Lhs, Rhs, Option> >
+template<typename Lhs, typename Rhs>
+struct traits<Product<Lhs, Rhs> >
 {
+  typedef MatrixXpr XprKind;
  typedef typename remove_all<Lhs>::type LhsCleaned;
  typedef typename remove_all<Rhs>::type RhsCleaned;
-  typedef traits<LhsCleaned> LhsTraits;
-  typedef traits<RhsCleaned> RhsTraits;
-  
-  typedef MatrixXpr XprKind;
-  
-  typedef typename product_result_scalar<LhsCleaned,RhsCleaned>::Scalar Scalar;
-  typedef typename product_promote_storage_type<typename LhsTraits::StorageKind,
-                                                typename RhsTraits::StorageKind,
-                                                internal::product_type<Lhs,Rhs>::ret>::ret StorageKind;
-  typedef typename promote_index_type<typename LhsTraits::StorageIndex,
-                                      typename RhsTraits::StorageIndex>::type StorageIndex;
-  
+  typedef typename scalar_product_traits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
+  typedef typename promote_storage_type<typename traits<LhsCleaned>::StorageKind,
+                                        typename traits<RhsCleaned>::StorageKind>::ret StorageKind;
+  typedef typename promote_index_type<typename traits<LhsCleaned>::Index,
+                                      typename traits<RhsCleaned>::Index>::type Index;
  enum {
-    RowsAtCompileTime    = LhsTraits::RowsAtCompileTime,
-    ColsAtCompileTime    = RhsTraits::ColsAtCompileTime,
-    MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime,
-    
-    // FIXME: only needed by GeneralMatrixMatrixTriangular
-    InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
-    
-    // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.
-    Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit
-          : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
-          : (   ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
-             || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit
-          : NoPreferredStorageOrderBit
+    RowsAtCompileTime = LhsCleaned::RowsAtCompileTime,
+    ColsAtCompileTime = RhsCleaned::ColsAtCompileTime,
+    MaxRowsAtCompileTime = LhsCleaned::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = RhsCleaned::MaxColsAtCompileTime,
+    Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0), // TODO should be no storage order
+    CoeffReadCost = 0 // TODO CoeffReadCost should not be part of the expression traits
  };
 };
-
 } // end namespace internal


-template<typename _Lhs, typename _Rhs, int Option>
-class Product : public ProductImpl<_Lhs,_Rhs,Option,
-                                   typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,
-                                                                                   typename internal::traits<_Rhs>::StorageKind,
-                                                                                   internal::product_type<_Lhs,_Rhs>::ret>::ret>
+template<typename Lhs, typename Rhs>
+class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
+                                                                            typename internal::traits<Rhs>::StorageKind>::ret>
 {
  public:
    
-    typedef _Lhs Lhs;
-    typedef _Rhs Rhs;
-    
    typedef typename ProductImpl<
-        Lhs, Rhs, Option,
-        typename internal::product_promote_storage_type<typename internal::traits<Lhs>::StorageKind,
-                                                        typename internal::traits<Rhs>::StorageKind,
-                                                        internal::product_type<Lhs,Rhs>::ret>::ret>::Base Base;
+        Lhs, Rhs,
+        typename internal::promote_storage_type<typename Lhs::StorageKind,
+                                                typename Rhs::StorageKind>::ret>::Base Base;
    EIGEN_GENERIC_PUBLIC_INTERFACE(Product)

-    typedef typename internal::ref_selector<Lhs>::type LhsNested;
-    typedef typename internal::ref_selector<Rhs>::type RhsNested;
+    typedef typename Lhs::Nested LhsNested;
+    typedef typename Rhs::Nested RhsNested;
    typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
    typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;

-    EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
+    Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
    {
      eigen_assert(lhs.cols() == rhs.rows()
        && "invalid matrix product"
        && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
    }

-    EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
-    EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
+    inline Index rows() const { return m_lhs.rows(); }
+    inline Index cols() const { return m_rhs.cols(); }

-    EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
-    EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
+    const LhsNestedCleaned& lhs() const { return m_lhs; }
+    const RhsNestedCleaned& rhs() const { return m_rhs; }

  protected:

-    LhsNested m_lhs;
-    RhsNested m_rhs;
+    const LhsNested m_lhs;
+    const RhsNested m_rhs;
 };

-namespace internal {
-  
-template<typename Lhs, typename Rhs, int Option, int ProductTag = internal::product_type<Lhs,Rhs>::ret>
-class dense_product_base
- : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
-{};
-
-/** Convertion to scalar for inner-products */
-template<typename Lhs, typename Rhs, int Option>
-class dense_product_base<Lhs, Rhs, Option, InnerProduct>
- : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
-{
-  typedef Product<Lhs,Rhs,Option> ProductXpr;
-  typedef typename internal::dense_xpr_base<ProductXpr>::type Base;
-public:
-  using Base::derived;
-  typedef typename Base::Scalar Scalar;
-  
-  operator const Scalar() const
-  {
-    return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
-  }
-};
-
-} // namespace internal
-
-// Generic API dispatcher
-template<typename Lhs, typename Rhs, int Option, typename StorageKind>
-class ProductImpl : public internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type
+template<typename Lhs, typename Rhs>
+class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type
 {
+    typedef Product<Lhs, Rhs> Derived;
  public:
-    typedef typename internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type Base;
-};

-template<typename Lhs, typename Rhs, int Option>
-class ProductImpl<Lhs,Rhs,Option,Dense>
-  : public internal::dense_product_base<Lhs,Rhs,Option>
-{
-    typedef Product<Lhs, Rhs, Option> Derived;
-    
-  public:
-    
-    typedef typename internal::dense_product_base<Lhs, Rhs, Option> Base;
+    typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
-  protected:
-    enum {
-      IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) && 
-                   (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic),
-      EnableCoeff = IsOneByOne || Option==LazyProduct
-    };
-    
-  public:
-  
-    EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const
-    {
-      EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
-      eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
-      
-      return internal::evaluator<Derived>(derived()).coeff(row,col);
-    }
-
-    EIGEN_DEVICE_FUNC Scalar coeff(Index i) const
-    {
-      EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
-      eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
-      
-      return internal::evaluator<Derived>(derived()).coeff(i);
-    }
-    
-  
 };

-/***************************************************************************
-* Implementation of matrix base methods
-***************************************************************************/
-
-
-/** \internal used to test the evaluator only
-  */
-template<typename Lhs,typename Rhs>
-const Product<Lhs,Rhs>
-prod(const Lhs& lhs, const Rhs& rhs)
-{
-  return Product<Lhs,Rhs>(lhs,rhs);
-}
-
-/** \internal used to test the evaluator only
-  */
-template<typename Lhs,typename Rhs>
-const Product<Lhs,Rhs,LazyProduct>
-lazyprod(const Lhs& lhs, const Rhs& rhs)
-{
-  return Product<Lhs,Rhs,LazyProduct>(lhs,rhs);
-}
-
-} // end namespace Eigen
-
 #endif // EIGEN_PRODUCT_H
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@@ -0,0 +1,278 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PRODUCTBASE_H
+#define EIGEN_PRODUCTBASE_H
+
+namespace Eigen { 
+
+/** \class ProductBase
+  * \ingroup Core_Module
+  *
+  */
+
+namespace internal {
+template<typename Derived, typename _Lhs, typename _Rhs>
+struct traits<ProductBase<Derived,_Lhs,_Rhs> >
+{
+  typedef MatrixXpr XprKind;
+  typedef typename remove_all<_Lhs>::type Lhs;
+  typedef typename remove_all<_Rhs>::type Rhs;
+  typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
+  typedef typename promote_storage_type<typename traits<Lhs>::StorageKind,
+                                           typename traits<Rhs>::StorageKind>::ret StorageKind;
+  typedef typename promote_index_type<typename traits<Lhs>::Index,
+                                         typename traits<Rhs>::Index>::type Index;
+  enum {
+    RowsAtCompileTime = traits<Lhs>::RowsAtCompileTime,
+    ColsAtCompileTime = traits<Rhs>::ColsAtCompileTime,
+    MaxRowsAtCompileTime = traits<Lhs>::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = traits<Rhs>::MaxColsAtCompileTime,
+    Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0)
+          | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit,
+                  // Note that EvalBeforeNestingBit and NestByRefBit
+                  // are not used in practice because nested is overloaded for products
+    CoeffReadCost = 0 // FIXME why is it needed ?
+  };
+};
+}
+
+#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \
+  typedef ProductBase<Derived, Lhs, Rhs > Base; \
+  EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
+  typedef typename Base::LhsNested LhsNested; \
+  typedef typename Base::_LhsNested _LhsNested; \
+  typedef typename Base::LhsBlasTraits LhsBlasTraits; \
+  typedef typename Base::ActualLhsType ActualLhsType; \
+  typedef typename Base::_ActualLhsType _ActualLhsType; \
+  typedef typename Base::RhsNested RhsNested; \
+  typedef typename Base::_RhsNested _RhsNested; \
+  typedef typename Base::RhsBlasTraits RhsBlasTraits; \
+  typedef typename Base::ActualRhsType ActualRhsType; \
+  typedef typename Base::_ActualRhsType _ActualRhsType; \
+  using Base::m_lhs; \
+  using Base::m_rhs;
+
+template<typename Derived, typename Lhs, typename Rhs>
+class ProductBase : public MatrixBase<Derived>
+{
+  public:
+    typedef MatrixBase<Derived> Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase)
+    
+    typedef typename Lhs::Nested LhsNested;
+    typedef typename internal::remove_all<LhsNested>::type _LhsNested;
+    typedef internal::blas_traits<_LhsNested> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef typename internal::remove_all<ActualLhsType>::type _ActualLhsType;
+    typedef typename internal::traits<Lhs>::Scalar LhsScalar;
+
+    typedef typename Rhs::Nested RhsNested;
+    typedef typename internal::remove_all<RhsNested>::type _RhsNested;
+    typedef internal::blas_traits<_RhsNested> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+    typedef typename internal::remove_all<ActualRhsType>::type _ActualRhsType;
+    typedef typename internal::traits<Rhs>::Scalar RhsScalar;
+
+    // Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once
+    typedef CoeffBasedProduct<LhsNested, RhsNested, 0> FullyLazyCoeffBaseProductType;
+
+  public:
+
+    typedef typename Base::PlainObject PlainObject;
+
+    ProductBase(const Lhs& lhs, const Rhs& rhs)
+      : m_lhs(lhs), m_rhs(rhs)
+    {
+      eigen_assert(lhs.cols() == rhs.rows()
+        && "invalid matrix product"
+        && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
+    }
+
+    inline Index rows() const { return m_lhs.rows(); }
+    inline Index cols() const { return m_rhs.cols(); }
+
+    template<typename Dest>
+    inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); }
+
+    template<typename Dest>
+    inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); }
+
+    template<typename Dest>
+    inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); }
+
+    template<typename Dest>
+    inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { derived().scaleAndAddTo(dst,alpha); }
+
+    const _LhsNested& lhs() const { return m_lhs; }
+    const _RhsNested& rhs() const { return m_rhs; }
+
+    // Implicit conversion to the nested type (trigger the evaluation of the product)
+    operator const PlainObject& () const
+    {
+      m_result.resize(m_lhs.rows(), m_rhs.cols());
+      derived().evalTo(m_result);
+      return m_result;
+    }
+
+    const Diagonal<const FullyLazyCoeffBaseProductType,0> diagonal() const
+    { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); }
+
+    template<int Index>
+    const Diagonal<FullyLazyCoeffBaseProductType,Index> diagonal() const
+    { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); }
+
+    const Diagonal<FullyLazyCoeffBaseProductType,Dynamic> diagonal(Index index) const
+    { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); }
+
+    // restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isnt a Lvalue expression
+    typename Base::CoeffReturnType coeff(Index row, Index col) const
+    {
+#ifdef EIGEN2_SUPPORT
+      return lhs().row(row).cwiseProduct(rhs().col(col).transpose()).sum();
+#else
+      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
+      eigen_assert(this->rows() == 1 && this->cols() == 1);
+      Matrix<Scalar,1,1> result = *this;
+      return result.coeff(row,col);
+#endif
+    }
+
+    typename Base::CoeffReturnType coeff(Index i) const
+    {
+      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
+      eigen_assert(this->rows() == 1 && this->cols() == 1);
+      Matrix<Scalar,1,1> result = *this;
+      return result.coeff(i);
+    }
+
+    const Scalar& coeffRef(Index row, Index col) const
+    {
+      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
+      eigen_assert(this->rows() == 1 && this->cols() == 1);
+      return derived().coeffRef(row,col);
+    }
+
+    const Scalar& coeffRef(Index i) const
+    {
+      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
+      eigen_assert(this->rows() == 1 && this->cols() == 1);
+      return derived().coeffRef(i);
+    }
+
+  protected:
+
+    LhsNested m_lhs;
+    RhsNested m_rhs;
+
+    mutable PlainObject m_result;
+};
+
+// here we need to overload the nested rule for products
+// such that the nested type is a const reference to a plain matrix
+namespace internal {
+template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
+struct nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
+{
+  typedef PlainObject const& type;
+};
+}
+
+template<typename NestedProduct>
+class ScaledProduct;
+
+// Note that these two operator* functions are not defined as member
+// functions of ProductBase, because, otherwise we would have to
+// define all overloads defined in MatrixBase. Furthermore, Using
+// "using Base::operator*" would not work with MSVC.
+//
+// Also note that here we accept any compatible scalar types
+template<typename Derived,typename Lhs,typename Rhs>
+const ScaledProduct<Derived>
+operator*(const ProductBase<Derived,Lhs,Rhs>& prod, typename Derived::Scalar x)
+{ return ScaledProduct<Derived>(prod.derived(), x); }
+
+template<typename Derived,typename Lhs,typename Rhs>
+typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value,
+                      const ScaledProduct<Derived> >::type
+operator*(const ProductBase<Derived,Lhs,Rhs>& prod, typename Derived::RealScalar x)
+{ return ScaledProduct<Derived>(prod.derived(), x); }
+
+
+template<typename Derived,typename Lhs,typename Rhs>
+const ScaledProduct<Derived>
+operator*(typename Derived::Scalar x,const ProductBase<Derived,Lhs,Rhs>& prod)
+{ return ScaledProduct<Derived>(prod.derived(), x); }
+
+template<typename Derived,typename Lhs,typename Rhs>
+typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value,
+                      const ScaledProduct<Derived> >::type
+operator*(typename Derived::RealScalar x,const ProductBase<Derived,Lhs,Rhs>& prod)
+{ return ScaledProduct<Derived>(prod.derived(), x); }
+
+namespace internal {
+template<typename NestedProduct>
+struct traits<ScaledProduct<NestedProduct> >
+ : traits<ProductBase<ScaledProduct<NestedProduct>,
+                         typename NestedProduct::_LhsNested,
+                         typename NestedProduct::_RhsNested> >
+{
+  typedef typename traits<NestedProduct>::StorageKind StorageKind;
+};
+}
+
+template<typename NestedProduct>
+class ScaledProduct
+  : public ProductBase<ScaledProduct<NestedProduct>,
+                       typename NestedProduct::_LhsNested,
+                       typename NestedProduct::_RhsNested>
+{
+  public:
+    typedef ProductBase<ScaledProduct<NestedProduct>,
+                       typename NestedProduct::_LhsNested,
+                       typename NestedProduct::_RhsNested> Base;
+    typedef typename Base::Scalar Scalar;
+    typedef typename Base::PlainObject PlainObject;
+//     EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct)
+
+    ScaledProduct(const NestedProduct& prod, Scalar x)
+    : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {}
+
+    template<typename Dest>
+    inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); }
+
+    template<typename Dest>
+    inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); }
+
+    template<typename Dest>
+    inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); }
+
+    template<typename Dest>
+    inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { m_prod.derived().scaleAndAddTo(dst,alpha * m_alpha); }
+
+    const Scalar& alpha() const { return m_alpha; }
+    
+  protected:
+    const NestedProduct& m_prod;
+    Scalar m_alpha;
+};
+
+/** \internal
+  * Overloaded to perform an efficient C = (A*B).lazy() */
+template<typename Derived>
+template<typename ProductDerived, typename Lhs, typename Rhs>
+Derived& MatrixBase<Derived>::lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other)
+{
+  other.derived().evalTo(derived());
+  return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_PRODUCTBASE_H
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
--- a/Eigen/src/Core/Random.h
+++ b/Eigen/src/Core/Random.h
@@ -28,18 +28,12 @@ struct functor_traits<scalar_random_op<Scalar> >

 /** \returns a random matrix expression
  *
-  * Numbers are uniformly spread through their whole definition range for integer types,
-  * and in the [-1:1] range for floating point scalar types.
-  * 
  * The parameters \a rows and \a cols are the number of rows and of columns of
  * the returned matrix. Must be compatible with this MatrixBase type.
  *
-  * \not_reentrant
-  * 
  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
  * it is redundant to pass \a rows and \a cols as arguments, so Random() should be used
  * instead.
-  * 
  *
  * Example: \include MatrixBase_random_int_int.cpp
  * Output: \verbinclude MatrixBase_random_int_int.out
@@ -47,28 +41,22 @@ struct functor_traits<scalar_random_op<Scalar> >
  * This expression has the "evaluate before nesting" flag so that it will be evaluated into
  * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
  * behavior with expressions involving random matrices.
-  * 
-  * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators.
  *
-  * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
+  * \sa MatrixBase::setRandom(), MatrixBase::Random(Index), MatrixBase::Random()
  */
 template<typename Derived>
-inline const typename DenseBase<Derived>::RandomReturnType
+inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
 DenseBase<Derived>::Random(Index rows, Index cols)
 {
  return NullaryExpr(rows, cols, internal::scalar_random_op<Scalar>());
 }

 /** \returns a random vector expression
-  *
-  * Numbers are uniformly spread through their whole definition range for integer types,
-  * and in the [-1:1] range for floating point scalar types.
  *
  * The parameter \a size is the size of the returned vector.
  * Must be compatible with this MatrixBase type.
  *
  * \only_for_vectors
-  * \not_reentrant
  *
  * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
  * it is redundant to pass \a size as argument, so Random() should be used
@@ -81,10 +69,10 @@ DenseBase<Derived>::Random(Index rows, Index cols)
  * a temporary vector whenever it is nested in a larger expression. This prevents unexpected
  * behavior with expressions involving random matrices.
  *
-  * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
+  * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random()
  */
 template<typename Derived>
-inline const typename DenseBase<Derived>::RandomReturnType
+inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
 DenseBase<Derived>::Random(Index size)
 {
  return NullaryExpr(size, internal::scalar_random_op<Scalar>());
@@ -92,9 +80,6 @@ DenseBase<Derived>::Random(Index size)

 /** \returns a fixed-size random matrix or vector expression
  *
-  * Numbers are uniformly spread through their whole definition range for integer types,
-  * and in the [-1:1] range for floating point scalar types.
-  * 
  * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
  * need to use the variants taking size arguments.
  *
@@ -104,13 +89,11 @@ DenseBase<Derived>::Random(Index size)
  * This expression has the "evaluate before nesting" flag so that it will be evaluated into
  * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
  * behavior with expressions involving random matrices.
-  * 
-  * \not_reentrant
  *
-  * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
+  * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random(Index)
  */
 template<typename Derived>
-inline const typename DenseBase<Derived>::RandomReturnType
+inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
 DenseBase<Derived>::Random()
 {
  return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op<Scalar>());
@@ -118,11 +101,6 @@ DenseBase<Derived>::Random()

 /** Sets all coefficients in this expression to random values.
  *
-  * Numbers are uniformly spread through their whole definition range for integer types,
-  * and in the [-1:1] range for floating point scalar types.
-  * 
-  * \not_reentrant
-  * 
  * Example: \include MatrixBase_setRandom.cpp
  * Output: \verbinclude MatrixBase_setRandom.out
  *
@@ -134,41 +112,32 @@ inline Derived& DenseBase<Derived>::setRandom()
  return *this = Random(rows(), cols());
 }

-/** Resizes to the given \a newSize, and sets all coefficients in this expression to random values.
+/** Resizes to the given \a size, and sets all coefficients in this expression to random values.
  *
-  * Numbers are uniformly spread through their whole definition range for integer types,
-  * and in the [-1:1] range for floating point scalar types.
-  * 
  * \only_for_vectors
-  * \not_reentrant
  *
  * Example: \include Matrix_setRandom_int.cpp
  * Output: \verbinclude Matrix_setRandom_int.out
  *
-  * \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random()
+  * \sa MatrixBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, MatrixBase::Random()
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
-PlainObjectBase<Derived>::setRandom(Index newSize)
+PlainObjectBase<Derived>::setRandom(Index size)
 {
-  resize(newSize);
+  resize(size);
  return setRandom();
 }

 /** Resizes to the given size, and sets all coefficients in this expression to random values.
  *
-  * Numbers are uniformly spread through their whole definition range for integer types,
-  * and in the [-1:1] range for floating point scalar types.
-  *
-  * \not_reentrant
-  * 
  * \param rows the new number of rows
  * \param cols the new number of columns
  *
  * Example: \include Matrix_setRandom_int_int.cpp
  * Output: \verbinclude Matrix_setRandom_int_int.out
  *
-  * \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random()
+  * \sa MatrixBase::setRandom(), setRandom(Index), class CwiseNullaryOp, MatrixBase::Random()
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -65,25 +65,6 @@ public:
              ? CompleteUnrolling
              : NoUnrolling
  };
-  
-#ifdef EIGEN_DEBUG_ASSIGN
-  static void debug()
-  {
-    std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
-    std::cerr.setf(std::ios::hex, std::ios::basefield);
-    EIGEN_DEBUG_VAR(Derived::Flags)
-    std::cerr.unsetf(std::ios::hex);
-    EIGEN_DEBUG_VAR(InnerMaxSize)
-    EIGEN_DEBUG_VAR(PacketSize)
-    EIGEN_DEBUG_VAR(MightVectorize)
-    EIGEN_DEBUG_VAR(MayLinearVectorize)
-    EIGEN_DEBUG_VAR(MaySliceVectorize)
-    EIGEN_DEBUG_VAR(Traversal)
-    EIGEN_DEBUG_VAR(UnrollingLimit)
-    EIGEN_DEBUG_VAR(Unrolling)
-    std::cerr << std::endl;
-  }
-#endif
 };

 /***************************************************************************
@@ -101,7 +82,6 @@ struct redux_novec_unroller

  typedef typename Derived::Scalar Scalar;

-  EIGEN_DEVICE_FUNC
  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
  {
    return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
@@ -119,7 +99,6 @@ struct redux_novec_unroller<Func, Derived, Start, 1>

  typedef typename Derived::Scalar Scalar;

-  EIGEN_DEVICE_FUNC
  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
  {
    return mat.coeffByOuterInner(outer, inner);
@@ -133,7 +112,6 @@ template<typename Func, typename Derived, int Start>
 struct redux_novec_unroller<Func, Derived, Start, 0>
 {
  typedef typename Derived::Scalar Scalar;
-  EIGEN_DEVICE_FUNC 
  static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
 };

@@ -165,7 +143,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
    index = Start * packet_traits<typename Derived::Scalar>::size,
    outer = index / int(Derived::InnerSizeAtCompileTime),
    inner = index % int(Derived::InnerSizeAtCompileTime),
-    alignment = Derived::Alignment
+    alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
  };

  typedef typename Derived::Scalar Scalar;
@@ -173,7 +151,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1>

  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
  {
-    return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
+    return mat.template packetByOuterInner<alignment>(outer, inner);
  }
 };

@@ -191,8 +169,8 @@ template<typename Func, typename Derived>
 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
 {
  typedef typename Derived::Scalar Scalar;
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
+  typedef typename Derived::Index Index;
+  static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
  {
    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
    Scalar res;
@@ -216,18 +194,18 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
 {
  typedef typename Derived::Scalar Scalar;
  typedef typename packet_traits<Scalar>::type PacketScalar;
+  typedef typename Derived::Index Index;

-  static Scalar run(const Derived &mat, const Func& func)
+  static Scalar run(const Derived& mat, const Func& func)
  {
    const Index size = mat.size();
-    
+    eigen_assert(size && "you are using an empty matrix");
    const Index packetSize = packet_traits<Scalar>::size;
-    const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
+    const Index alignedStart = internal::first_aligned(mat);
    enum {
-      alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
-      alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
+      alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
+                ? Aligned : Unaligned
    };
-    const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
    const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
    const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
    const Index alignedEnd2 = alignedStart + alignedSize2;
@@ -235,19 +213,19 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
    Scalar res;
    if(alignedSize)
    {
-      PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
+      PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
      if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
      {
-        PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
+        PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
        for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
        {
-          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
-          packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
+          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
+          packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
        }

        packet_res0 = func.packetOp(packet_res0,packet_res1);
        if(alignedEnd>alignedEnd2)
-          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
+          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
      }
      res = func.predux(packet_res0);

@@ -273,9 +251,10 @@ template<typename Func, typename Derived>
 struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
 {
  typedef typename Derived::Scalar Scalar;
-  typedef typename packet_traits<Scalar>::type PacketType;
+  typedef typename packet_traits<Scalar>::type PacketScalar;
+  typedef typename Derived::Index Index;

-  EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
+  static Scalar run(const Derived& mat, const Func& func)
  {
    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
    const Index innerSize = mat.innerSize();
@@ -287,10 +266,10 @@ struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
    Scalar res;
    if(packetedInnerSize)
    {
-      PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
+      PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
      for(Index j=0; j<outerSize; ++j)
        for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
-          packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
+          packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));

      res = func.predux(packet_res);
      for(Index j=0; j<outerSize; ++j)
@@ -317,83 +296,16 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
    Size = Derived::SizeAtCompileTime,
    VectorizedSize = (Size / PacketSize) * PacketSize
  };
-  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
+  static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
  {
    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
-    if (VectorizedSize > 0) {
-      Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
-      if (VectorizedSize != Size)
-        res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
-      return res;
-    }
-    else {
-      return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
-    }
+    Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
+    if (VectorizedSize != Size)
+      res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
+    return res;
  }
 };

-// evaluator adaptor
-template<typename _XprType>
-class redux_evaluator
-{
-public:
-  typedef _XprType XprType;
-  EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
-  
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-  typedef typename XprType::PacketScalar PacketScalar;
-  typedef typename XprType::PacketReturnType PacketReturnType;
-  
-  enum {
-    MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
-    // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
-    Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
-    IsRowMajor = XprType::IsRowMajor,
-    SizeAtCompileTime = XprType::SizeAtCompileTime,
-    InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
-    CoeffReadCost = evaluator<XprType>::CoeffReadCost,
-    Alignment = evaluator<XprType>::Alignment
-  };
-  
-  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
-  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
-  EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
-  EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
-  EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
-
-  EIGEN_DEVICE_FUNC
-  CoeffReturnType coeff(Index row, Index col) const
-  { return m_evaluator.coeff(row, col); }
-
-  EIGEN_DEVICE_FUNC
-  CoeffReturnType coeff(Index index) const
-  { return m_evaluator.coeff(index); }
-
-  template<int LoadMode, typename PacketType>
-  PacketReturnType packet(Index row, Index col) const
-  { return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
-
-  template<int LoadMode, typename PacketType>
-  PacketReturnType packet(Index index) const
-  { return m_evaluator.template packet<LoadMode,PacketType>(index); }
-  
-  EIGEN_DEVICE_FUNC
-  CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
-  { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
-  
-  template<int LoadMode, typename PacketType>
-  PacketReturnType packetByOuterInner(Index outer, Index inner) const
-  { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
-  
-  const XprType & nestedExpression() const { return m_xpr; }
-  
-protected:
-  internal::evaluator<XprType> m_evaluator;
-  const XprType &m_xpr;
-};
-
 } // end namespace internal

 /***************************************************************************
@@ -404,51 +316,36 @@ protected:
 /** \returns the result of a full redux operation on the whole matrix or vector using \a func
  *
  * The template parameter \a BinaryOp is the type of the functor \a func which must be
-  * an associative operator. Both current C++98 and C++11 functor styles are handled.
+  * an associative operator. Both current STL and TR1 functor styles are handled.
  *
  * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
  */
 template<typename Derived>
 template<typename Func>
-typename internal::traits<Derived>::Scalar
+EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
 DenseBase<Derived>::redux(const Func& func) const
 {
-  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
-  
-  // FIXME, eval_nest should be handled by redux_evaluator, however:
-  //  - it is currently difficult to provide the right Flags since they are still handled by the expressions
-  //  - handling it here might reduce the number of template instantiations
-//   typedef typename internal::nested_eval<Derived,1>::type ThisNested;
-//   typedef typename internal::remove_all<ThisNested>::type ThisNestedCleaned;
-//   typedef typename internal::redux_evaluator<ThisNestedCleaned> ThisEvaluator;
-//   
-//   ThisNested thisNested(derived());
-//   ThisEvaluator thisEval(thisNested);
-  
-  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
-  ThisEvaluator thisEval(derived());
-  
-  return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
+  typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
+  return internal::redux_impl<Func, ThisNested>
+            ::run(derived(), func);
 }

-/** \returns the minimum of all coefficients of \c *this.
-  * \warning the result is undefined if \c *this contains NaN.
+/** \returns the minimum of all coefficients of *this
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::minCoeff() const
 {
-  return derived().redux(Eigen::internal::scalar_min_op<Scalar>());
+  return this->redux(Eigen::internal::scalar_min_op<Scalar>());
 }

-/** \returns the maximum of all coefficients of \c *this.
-  * \warning the result is undefined if \c *this contains NaN.
+/** \returns the maximum of all coefficients of *this
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::maxCoeff() const
 {
-  return derived().redux(Eigen::internal::scalar_max_op<Scalar>());
+  return this->redux(Eigen::internal::scalar_max_op<Scalar>());
 }

 /** \returns the sum of all coefficients of *this
@@ -461,7 +358,7 @@ DenseBase<Derived>::sum() const
 {
  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
    return Scalar(0);
-  return derived().redux(Eigen::internal::scalar_sum_op<Scalar>());
+  return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
 }

 /** \returns the mean of all coefficients of *this
@@ -472,7 +369,7 @@ template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::mean() const
 {
-  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
+  return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
 }

 /** \returns the product of all coefficients of *this
@@ -488,7 +385,7 @@ DenseBase<Derived>::prod() const
 {
  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
    return Scalar(1);
-  return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
+  return this->redux(Eigen::internal::scalar_product_op<Scalar>());
 }

 /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -1,276 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_REF_H
-#define EIGEN_REF_H
-
-namespace Eigen { 
-
-/** \class Ref
-  * \ingroup Core_Module
-  *
-  * \brief A matrix or vector expression mapping an existing expression
-  *
-  * \tparam PlainObjectType the equivalent matrix type of the mapped data
-  * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
-  *                The default is \c #Unaligned.
-  * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
-  *                   but accepts a variable outer stride (leading dimension).
-  *                   This can be overridden by specifying strides.
-  *                   The type passed here must be a specialization of the Stride template, see examples below.
-  *
-  * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
-  * A Ref<> object can represent either a const expression or a l-value:
-  * \code
-  * // in-out argument:
-  * void foo1(Ref<VectorXf> x);
-  *
-  * // read-only const argument:
-  * void foo2(const Ref<const VectorXf>& x);
-  * \endcode
-  *
-  * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
-  * By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
-  * Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
-  * the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
-  * can be greater than the number of rows.
-  *
-  * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
-  * Here are some examples:
-  * \code
-  * MatrixXf A;
-  * VectorXf a;
-  * foo1(a.head());             // OK
-  * foo1(A.col());              // OK
-  * foo1(A.row());              // Compilation error because here innerstride!=1
-  * foo2(A.row());              // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
-  * foo2(A.row().transpose());  // The row is copied into a contiguous temporary
-  * foo2(2*a);                  // The expression is evaluated into a temporary
-  * foo2(A.col().segment(2,4)); // No temporary
-  * \endcode
-  *
-  * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
-  * Here is an example accepting an innerstride!=1:
-  * \code
-  * // in-out argument:
-  * void foo3(Ref<VectorXf,0,InnerStride<> > x);
-  * foo3(A.row());              // OK
-  * \endcode
-  * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
-  * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
-  * template function, e.g.:
-  * \code
-  * // in the .h:
-  * void foo(const Ref<MatrixXf>& A);
-  * void foo(const Ref<MatrixXf,0,Stride<> >& A);
-  *
-  * // in the .cpp:
-  * template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
-  *     ... // crazy code goes here
-  * }
-  * void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
-  * void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
-  * \endcode
-  *
-  *
-  * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
-  */
-
-namespace internal {
-
-template<typename _PlainObjectType, int _Options, typename _StrideType>
-struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
-  : public traits<Map<_PlainObjectType, _Options, _StrideType> >
-{
-  typedef _PlainObjectType PlainObjectType;
-  typedef _StrideType StrideType;
-  enum {
-    Options = _Options,
-    Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit,
-    Alignment = traits<Map<_PlainObjectType, _Options, _StrideType> >::Alignment
-  };
-
-  template<typename Derived> struct match {
-    enum {
-      HasDirectAccess = internal::has_direct_access<Derived>::ret,
-      StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)),
-      InnerStrideMatch = int(StrideType::InnerStrideAtCompileTime)==int(Dynamic)
-                      || int(StrideType::InnerStrideAtCompileTime)==int(Derived::InnerStrideAtCompileTime)
-                      || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
-      OuterStrideMatch = Derived::IsVectorAtCompileTime
-                      || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
-      AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (int(evaluator<Derived>::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
-      ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
-      MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
-    };
-    typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
-  };
-  
-};
-
-template<typename Derived>
-struct traits<RefBase<Derived> > : public traits<Derived> {};
-
-}
-
-template<typename Derived> class RefBase
- : public MapBase<Derived>
-{
-  typedef typename internal::traits<Derived>::PlainObjectType PlainObjectType;
-  typedef typename internal::traits<Derived>::StrideType StrideType;
-
-public:
-
-  typedef MapBase<Derived> Base;
-  EIGEN_DENSE_PUBLIC_INTERFACE(RefBase)
-
-  EIGEN_DEVICE_FUNC inline Index innerStride() const
-  {
-    return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
-  }
-
-  EIGEN_DEVICE_FUNC inline Index outerStride() const
-  {
-    return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
-         : IsVectorAtCompileTime ? this->size()
-         : int(Flags)&RowMajorBit ? this->cols()
-         : this->rows();
-  }
-
-  EIGEN_DEVICE_FUNC RefBase()
-    : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime),
-      // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values:
-      m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime,
-               StrideType::InnerStrideAtCompileTime==Dynamic?0:StrideType::InnerStrideAtCompileTime)
-  {}
-  
-  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(RefBase)
-
-protected:
-
-  typedef Stride<StrideType::OuterStrideAtCompileTime,StrideType::InnerStrideAtCompileTime> StrideBase;
-
-  template<typename Expression>
-  EIGEN_DEVICE_FUNC void construct(Expression& expr)
-  {
-    if(PlainObjectType::RowsAtCompileTime==1)
-    {
-      eigen_assert(expr.rows()==1 || expr.cols()==1);
-      ::new (static_cast<Base*>(this)) Base(expr.data(), 1, expr.size());
-    }
-    else if(PlainObjectType::ColsAtCompileTime==1)
-    {
-      eigen_assert(expr.rows()==1 || expr.cols()==1);
-      ::new (static_cast<Base*>(this)) Base(expr.data(), expr.size(), 1);
-    }
-    else
-      ::new (static_cast<Base*>(this)) Base(expr.data(), expr.rows(), expr.cols());
-    
-    if(Expression::IsVectorAtCompileTime && (!PlainObjectType::IsVectorAtCompileTime) && ((Expression::Flags&RowMajorBit)!=(PlainObjectType::Flags&RowMajorBit)))
-      ::new (&m_stride) StrideBase(expr.innerStride(), StrideType::InnerStrideAtCompileTime==0?0:1);
-    else
-      ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(),
-                                   StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());    
-  }
-
-  StrideBase m_stride;
-};
-
-
-template<typename PlainObjectType, int Options, typename StrideType> class Ref
-  : public RefBase<Ref<PlainObjectType, Options, StrideType> >
-{
-  private:
-    typedef internal::traits<Ref> Traits;
-    template<typename Derived>
-    EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr,
-                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
-  public:
-
-    typedef RefBase<Ref> Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
-
-
-    #ifndef EIGEN_PARSED_BY_DOXYGEN
-    template<typename Derived>
-    EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr,
-                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
-    {
-      EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
-      Base::construct(expr.derived());
-    }
-    template<typename Derived>
-    EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
-                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
-    #else
-    template<typename Derived>
-    inline Ref(DenseBase<Derived>& expr)
-    #endif
-    {
-      EIGEN_STATIC_ASSERT(bool(internal::is_lvalue<Derived>::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
-      EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
-      EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
-      Base::construct(expr.const_cast_derived());
-    }
-
-    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Ref)
-
-};
-
-// this is the const ref version
-template<typename TPlainObjectType, int Options, typename StrideType> class Ref<const TPlainObjectType, Options, StrideType>
-  : public RefBase<Ref<const TPlainObjectType, Options, StrideType> >
-{
-    typedef internal::traits<Ref> Traits;
-  public:
-
-    typedef RefBase<Ref> Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
-
-    template<typename Derived>
-    EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
-                                 typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
-    {
-//      std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
-//      std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";
-//      std::cout << int(StrideType::InnerStrideAtCompileTime) << " - " << int(Derived::InnerStrideAtCompileTime) << "\n";
-      construct(expr.derived(), typename Traits::template match<Derived>::type());
-    }
-
-    EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) {
-      // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy
-    }
-
-    template<typename OtherRef>
-    EIGEN_DEVICE_FUNC inline Ref(const RefBase<OtherRef>& other) {
-      construct(other.derived(), typename Traits::template match<OtherRef>::type());
-    }
-
-  protected:
-
-    template<typename Expression>
-    EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type)
-    {
-      Base::construct(expr);
-    }
-
-    template<typename Expression>
-    EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type)
-    {
-      internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar>());
-      Base::construct(m_object);
-    }
-
-  protected:
-    TPlainObjectType m_object;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_REF_H
--- a/Eigen/src/Core/Replicate.h
+++ b/Eigen/src/Core/Replicate.h
@@ -35,7 +35,10 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
  typedef typename MatrixType::Scalar Scalar;
  typedef typename traits<MatrixType>::StorageKind StorageKind;
  typedef typename traits<MatrixType>::XprKind XprKind;
-  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+  enum {
+    Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
+  };
+  typedef typename nested<MatrixType,Factor>::type MatrixTypeNested;
  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
  enum {
    RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
@@ -50,9 +53,8 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
    IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
               : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
               : (MatrixType::Flags & RowMajorBit) ? 1 : 0,
-    
-    // FIXME enable DirectAccess with negative strides?
-    Flags = IsRowMajor ? RowMajorBit : 0
+    Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0),
+    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
  };
 };
 }
@@ -66,10 +68,8 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate

    typedef typename internal::dense_xpr_base<Replicate>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)
-    typedef typename internal::remove_all<MatrixType>::type NestedExpression;

    template<typename OriginalMatrixType>
-    EIGEN_DEVICE_FUNC
    inline explicit Replicate(const OriginalMatrixType& matrix)
      : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
    {
@@ -79,7 +79,6 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
    }

    template<typename OriginalMatrixType>
-    EIGEN_DEVICE_FUNC
    inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
      : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
    {
@@ -87,12 +86,34 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
                          THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
    }

-    EIGEN_DEVICE_FUNC
    inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
-    EIGEN_DEVICE_FUNC
    inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }

-    EIGEN_DEVICE_FUNC
+    inline Scalar coeff(Index row, Index col) const
+    {
+      // try to avoid using modulo; this is a pure optimization strategy
+      const Index actual_row  = internal::traits<MatrixType>::RowsAtCompileTime==1 ? 0
+                            : RowFactor==1 ? row
+                            : row%m_matrix.rows();
+      const Index actual_col  = internal::traits<MatrixType>::ColsAtCompileTime==1 ? 0
+                            : ColFactor==1 ? col
+                            : col%m_matrix.cols();
+
+      return m_matrix.coeff(actual_row, actual_col);
+    }
+    template<int LoadMode>
+    inline PacketScalar packet(Index row, Index col) const
+    {
+      const Index actual_row  = internal::traits<MatrixType>::RowsAtCompileTime==1 ? 0
+                            : RowFactor==1 ? row
+                            : row%m_matrix.rows();
+      const Index actual_col  = internal::traits<MatrixType>::ColsAtCompileTime==1 ? 0
+                            : ColFactor==1 ? col
+                            : col%m_matrix.cols();
+
+      return m_matrix.template packet<LoadMode>(actual_row, actual_col);
+    }
+
    const _MatrixTypeNested& nestedExpression() const
    { 
      return m_matrix; 
@@ -114,12 +135,27 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
  */
 template<typename Derived>
 template<int RowFactor, int ColFactor>
-const Replicate<Derived,RowFactor,ColFactor>
+inline const Replicate<Derived,RowFactor,ColFactor>
 DenseBase<Derived>::replicate() const
 {
  return Replicate<Derived,RowFactor,ColFactor>(derived());
 }

+/**
+  * \return an expression of the replication of \c *this
+  *
+  * Example: \include MatrixBase_replicate_int_int.cpp
+  * Output: \verbinclude MatrixBase_replicate_int_int.out
+  *
+  * \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
+  */
+template<typename Derived>
+inline const Replicate<Derived,Dynamic,Dynamic>
+DenseBase<Derived>::replicate(Index rowFactor,Index colFactor) const
+{
+  return Replicate<Derived,Dynamic,Dynamic>(derived(),rowFactor,colFactor);
+}
+
 /**
  * \return an expression of the replication of each column (or row) of \c *this
  *
--- a/Eigen/src/Core/ReturnByValue.h
+++ b/Eigen/src/Core/ReturnByValue.h
@@ -38,10 +38,9 @@ struct traits<ReturnByValue<Derived> >
 * So internal::nested always gives the plain return matrix type.
 *
 * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ??
- * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators
 */
 template<typename Derived,int n,typename PlainObject>
-struct nested_eval<ReturnByValue<Derived>, n, PlainObject>
+struct nested<ReturnByValue<Derived>, n, PlainObject>
 {
  typedef typename traits<Derived>::ReturnType type;
 };
@@ -49,7 +48,7 @@ struct nested_eval<ReturnByValue<Derived>, n, PlainObject>
 } // end namespace internal

 template<typename Derived> class ReturnByValue
-  : public internal::dense_xpr_base< ReturnByValue<Derived> >::type, internal::no_assignment_operator
+  : public internal::dense_xpr_base< ReturnByValue<Derived> >::type
 {
  public:
    typedef typename internal::traits<Derived>::ReturnType ReturnType;
@@ -58,11 +57,10 @@ template<typename Derived> class ReturnByValue
    EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue)

    template<typename Dest>
-    EIGEN_DEVICE_FUNC
    inline void evalTo(Dest& dst) const
    { static_cast<const Derived*>(this)->evalTo(dst); }
-    EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
-    EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
+    inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
+    inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }

 #ifndef EIGEN_PARSED_BY_DOXYGEN
 #define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT
@@ -74,7 +72,6 @@ template<typename Derived> class ReturnByValue
    const Unusable& coeff(Index,Index) const { return *reinterpret_cast<const Unusable*>(this); }
    Unusable& coeffRef(Index) { return *reinterpret_cast<Unusable*>(this); }
    Unusable& coeffRef(Index,Index) { return *reinterpret_cast<Unusable*>(this); }
-#undef Unusable
 #endif
 };

@@ -86,33 +83,6 @@ Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
  return derived();
 }

-namespace internal {
-
-// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that
-// when a ReturnByValue expression is assigned, the evaluator is not constructed.
-// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world
-  
-template<typename Derived>
-struct evaluator<ReturnByValue<Derived> >
-  : public evaluator<typename internal::traits<Derived>::ReturnType>
-{
-  typedef ReturnByValue<Derived> XprType;
-  typedef typename internal::traits<Derived>::ReturnType PlainObject;
-  typedef evaluator<PlainObject> Base;
-  
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
-    : m_result(xpr.rows(), xpr.cols())
-  {
-    ::new (static_cast<Base*>(this)) Base(m_result);
-    xpr.evalTo(m_result);
-  }
-
-protected:
-  PlainObject m_result;
-};
-
-} // end namespace internal
-
 } // end namespace Eigen

 #endif // EIGEN_RETURNBYVALUE_H
--- a/Eigen/src/Core/Reverse.h
+++ b/Eigen/src/Core/Reverse.h
@@ -37,25 +37,32 @@ struct traits<Reverse<MatrixType, Direction> >
  typedef typename MatrixType::Scalar Scalar;
  typedef typename traits<MatrixType>::StorageKind StorageKind;
  typedef typename traits<MatrixType>::XprKind XprKind;
-  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+  typedef typename nested<MatrixType>::type MatrixTypeNested;
  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
  enum {
    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit)
+
+    // let's enable LinearAccess only with vectorization because of the product overhead
+    LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) )
+                 ? LinearAccessBit : 0,
+
+    Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess),
+
+    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
  };
 };

-template<typename PacketType, bool ReversePacket> struct reverse_packet_cond
+template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond
 {
-  static inline PacketType run(const PacketType& x) { return preverse(x); }
+  static inline PacketScalar run(const PacketScalar& x) { return preverse(x); }
 };

-template<typename PacketType> struct reverse_packet_cond<PacketType,false>
+template<typename PacketScalar> struct reverse_packet_cond<PacketScalar,false>
 {
-  static inline PacketType run(const PacketType& x) { return x; }
+  static inline PacketScalar run(const PacketScalar& x) { return x; }
 };

 } // end namespace internal 
@@ -67,9 +74,12 @@ template<typename MatrixType, int Direction> class Reverse

    typedef typename internal::dense_xpr_base<Reverse>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
-    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
    using Base::IsRowMajor;

+    // next line is necessary because otherwise const version of operator()
+    // is hidden by non-const version defined in this file
+    using Base::operator(); 
+
  protected:
    enum {
      PacketSize = internal::packet_traits<Scalar>::size,
@@ -85,19 +95,82 @@ template<typename MatrixType, int Direction> class Reverse
    typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
  public:

-    EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }
+    inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }

    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse)

-    EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
-    EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
+    inline Index rows() const { return m_matrix.rows(); }
+    inline Index cols() const { return m_matrix.cols(); }

-    EIGEN_DEVICE_FUNC inline Index innerStride() const
+    inline Index innerStride() const
    {
      return -m_matrix.innerStride();
    }

-    EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&
+    inline Scalar& operator()(Index row, Index col)
+    {
+      eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
+      return coeffRef(row, col);
+    }
+
+    inline Scalar& coeffRef(Index row, Index col)
+    {
+      return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row,
+                                                    ReverseCol ? m_matrix.cols() - col - 1 : col);
+    }
+
+    inline CoeffReturnType coeff(Index row, Index col) const
+    {
+      return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row,
+                            ReverseCol ? m_matrix.cols() - col - 1 : col);
+    }
+
+    inline CoeffReturnType coeff(Index index) const
+    {
+      return m_matrix.coeff(m_matrix.size() - index - 1);
+    }
+
+    inline Scalar& coeffRef(Index index)
+    {
+      return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1);
+    }
+
+    inline Scalar& operator()(Index index)
+    {
+      eigen_assert(index >= 0 && index < m_matrix.size());
+      return coeffRef(index);
+    }
+
+    template<int LoadMode>
+    inline const PacketScalar packet(Index row, Index col) const
+    {
+      return reverse_packet::run(m_matrix.template packet<LoadMode>(
+                                    ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
+                                    ReverseCol ? m_matrix.cols() - col - OffsetCol : col));
+    }
+
+    template<int LoadMode>
+    inline void writePacket(Index row, Index col, const PacketScalar& x)
+    {
+      m_matrix.const_cast_derived().template writePacket<LoadMode>(
+                                      ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
+                                      ReverseCol ? m_matrix.cols() - col - OffsetCol : col,
+                                      reverse_packet::run(x));
+    }
+
+    template<int LoadMode>
+    inline const PacketScalar packet(Index index) const
+    {
+      return internal::preverse(m_matrix.template packet<LoadMode>( m_matrix.size() - index - PacketSize ));
+    }
+
+    template<int LoadMode>
+    inline void writePacket(Index index, const PacketScalar& x)
+    {
+      m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
+    }
+
+    const typename internal::remove_all<typename MatrixType::Nested>::type& 
    nestedExpression() const 
    {
      return m_matrix;
@@ -117,93 +190,33 @@ template<typename Derived>
 inline typename DenseBase<Derived>::ReverseReturnType
 DenseBase<Derived>::reverse()
 {
-  return ReverseReturnType(derived());
+  return derived();
 }

-
-//reverse const overload moved DenseBase.h due to a CUDA compiler bug
+/** This is the const version of reverse(). */
+template<typename Derived>
+inline const typename DenseBase<Derived>::ConstReverseReturnType
+DenseBase<Derived>::reverse() const
+{
+  return derived();
+}

 /** This is the "in place" version of reverse: it reverses \c *this.
  *
  * In most cases it is probably better to simply use the reversed expression
  * of a matrix. However, when reversing the matrix data itself is really needed,
  * then this "in-place" version is probably the right choice because it provides
-  * the following additional benefits:
+  * the following additional features:
  *  - less error prone: doing the same operation with .reverse() requires special care:
  *    \code m = m.reverse().eval(); \endcode
-  *  - this API enables reverse operations without the need for a temporary
+  *  - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap)
  *  - it allows future optimizations (cache friendliness, etc.)
  *
-  * \sa VectorwiseOp::reverseInPlace(), reverse() */
+  * \sa reverse() */
 template<typename Derived>
 inline void DenseBase<Derived>::reverseInPlace()
 {
-  if(cols()>rows())
-  {
-    Index half = cols()/2;
-    leftCols(half).swap(rightCols(half).reverse());
-    if((cols()%2)==1)
-    {
-      Index half2 = rows()/2;
-      col(half).head(half2).swap(col(half).tail(half2).reverse());
-    }
-  }
-  else
-  {
-    Index half = rows()/2;
-    topRows(half).swap(bottomRows(half).reverse());
-    if((rows()%2)==1)
-    {
-      Index half2 = cols()/2;
-      row(half).head(half2).swap(row(half).tail(half2).reverse());
-    }
-  }
-}
-
-namespace internal {
-  
-template<int Direction>
-struct vectorwise_reverse_inplace_impl;
-
-template<>
-struct vectorwise_reverse_inplace_impl<Vertical>
-{
-  template<typename ExpressionType>
-  static void run(ExpressionType &xpr)
-  {
-    Index half = xpr.rows()/2;
-    xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse());
-  }
-};
-
-template<>
-struct vectorwise_reverse_inplace_impl<Horizontal>
-{
-  template<typename ExpressionType>
-  static void run(ExpressionType &xpr)
-  {
-    Index half = xpr.cols()/2;
-    xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse());
-  }
-};
-
-} // end namespace internal
-
-/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this.
-  *
-  * In most cases it is probably better to simply use the reversed expression
-  * of a matrix. However, when reversing the matrix data itself is really needed,
-  * then this "in-place" version is probably the right choice because it provides
-  * the following additional benefits:
-  *  - less error prone: doing the same operation with .reverse() requires special care:
-  *    \code m = m.reverse().eval(); \endcode
-  *  - this API enables reverse operations without the need for a temporary
-  *
-  * \sa DenseBase::reverseInPlace(), reverse() */
-template<typename ExpressionType, int Direction>
-void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()
-{
-  internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived());
+  derived() = derived().reverse().eval();
 }

 } // end namespace Eigen
--- a/Eigen/src/Core/Select.h
+++ b/Eigen/src/Core/Select.h
@@ -43,34 +43,35 @@ struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
    ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime,
    MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime,
-    Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit
+    Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits,
+    CoeffReadCost = traits<typename remove_all<ConditionMatrixNested>::type>::CoeffReadCost
+                  + EIGEN_SIZE_MAX(traits<typename remove_all<ThenMatrixNested>::type>::CoeffReadCost,
+                                   traits<typename remove_all<ElseMatrixNested>::type>::CoeffReadCost)
  };
 };
 }

 template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
-class Select : public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type,
-               internal::no_assignment_operator
+class Select : internal::no_assignment_operator,
+  public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type
 {
  public:

    typedef typename internal::dense_xpr_base<Select>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(Select)

-    inline EIGEN_DEVICE_FUNC
-    Select(const ConditionMatrixType& a_conditionMatrix,
-           const ThenMatrixType& a_thenMatrix,
-           const ElseMatrixType& a_elseMatrix)
-      : m_condition(a_conditionMatrix), m_then(a_thenMatrix), m_else(a_elseMatrix)
+    Select(const ConditionMatrixType& conditionMatrix,
+           const ThenMatrixType& thenMatrix,
+           const ElseMatrixType& elseMatrix)
+      : m_condition(conditionMatrix), m_then(thenMatrix), m_else(elseMatrix)
    {
      eigen_assert(m_condition.rows() == m_then.rows() && m_condition.rows() == m_else.rows());
      eigen_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols());
    }

-    inline EIGEN_DEVICE_FUNC Index rows() const { return m_condition.rows(); }
-    inline EIGEN_DEVICE_FUNC Index cols() const { return m_condition.cols(); }
+    Index rows() const { return m_condition.rows(); }
+    Index cols() const { return m_condition.cols(); }

-    inline EIGEN_DEVICE_FUNC
    const Scalar coeff(Index i, Index j) const
    {
      if (m_condition.coeff(i,j))
@@ -79,7 +80,6 @@ class Select : public internal::dense_xpr_base< Select<ConditionMatrixType, Then
        return m_else.coeff(i,j);
    }

-    inline EIGEN_DEVICE_FUNC
    const Scalar coeff(Index i) const
    {
      if (m_condition.coeff(i))
@@ -88,17 +88,17 @@ class Select : public internal::dense_xpr_base< Select<ConditionMatrixType, Then
        return m_else.coeff(i);
    }

-    inline EIGEN_DEVICE_FUNC const ConditionMatrixType& conditionMatrix() const
+    const ConditionMatrixType& conditionMatrix() const
    {
      return m_condition;
    }

-    inline EIGEN_DEVICE_FUNC const ThenMatrixType& thenMatrix() const
+    const ThenMatrixType& thenMatrix() const
    {
      return m_then;
    }

-    inline EIGEN_DEVICE_FUNC const ElseMatrixType& elseMatrix() const
+    const ElseMatrixType& elseMatrix() const
    {
      return m_else;
    }
@@ -136,7 +136,7 @@ template<typename Derived>
 template<typename ThenDerived>
 inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
 DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
-                           const typename ThenDerived::Scalar& elseScalar) const
+                            typename ThenDerived::Scalar elseScalar) const
 {
  return Select<Derived,ThenDerived,typename ThenDerived::ConstantReturnType>(
    derived(), thenMatrix.derived(), ThenDerived::Constant(rows(),cols(),elseScalar));
@@ -150,8 +150,8 @@ DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
 template<typename Derived>
 template<typename ElseDerived>
 inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
-DenseBase<Derived>::select(const typename ElseDerived::Scalar& thenScalar,
-                           const DenseBase<ElseDerived>& elseMatrix) const
+DenseBase<Derived>::select(typename ElseDerived::Scalar thenScalar,
+                            const DenseBase<ElseDerived>& elseMatrix) const
 {
  return Select<Derived,typename ElseDerived::ConstantReturnType,ElseDerived>(
    derived(), ElseDerived::Constant(rows(),cols(),thenScalar), elseMatrix.derived());
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -32,57 +32,54 @@ namespace internal {
 template<typename MatrixType, unsigned int UpLo>
 struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
 {
-  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+  typedef typename nested<MatrixType>::type MatrixTypeNested;
  typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
  typedef MatrixType ExpressionType;
-  typedef typename MatrixType::PlainObject FullMatrixType;
+  typedef typename MatrixType::PlainObject DenseMatrixType;
  enum {
    Mode = UpLo | SelfAdjoint,
-    FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
-    Flags =  MatrixTypeNestedCleaned::Flags & (HereditaryBits|FlagsLvalueBit)
-           & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)) // FIXME these flags should be preserved
+    Flags =  MatrixTypeNestedCleaned::Flags & (HereditaryBits)
+           & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)), // FIXME these flags should be preserved
+    CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost
  };
 };
 }

+template <typename Lhs, int LhsMode, bool LhsIsVector,
+          typename Rhs, int RhsMode, bool RhsIsVector>
+struct SelfadjointProductMatrix;
+
 // FIXME could also be called SelfAdjointWrapper to be consistent with DiagonalWrapper ??
-template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
-  : public TriangularBase<SelfAdjointView<_MatrixType, UpLo> >
+template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
+  : public TriangularBase<SelfAdjointView<MatrixType, UpLo> >
 {
  public:

-    typedef _MatrixType MatrixType;
    typedef TriangularBase<SelfAdjointView> Base;
    typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;
    typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;

    /** \brief The type of coefficients in this matrix */
    typedef typename internal::traits<SelfAdjointView>::Scalar Scalar; 
-    typedef typename MatrixType::StorageIndex StorageIndex;
+
+    typedef typename MatrixType::Index Index;

    enum {
-      Mode = internal::traits<SelfAdjointView>::Mode,
-      Flags = internal::traits<SelfAdjointView>::Flags
+      Mode = internal::traits<SelfAdjointView>::Mode
    };
    typedef typename MatrixType::PlainObject PlainObject;

-    EIGEN_DEVICE_FUNC
-    explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
+    inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
    {}

-    EIGEN_DEVICE_FUNC
    inline Index rows() const { return m_matrix.rows(); }
-    EIGEN_DEVICE_FUNC
    inline Index cols() const { return m_matrix.cols(); }
-    EIGEN_DEVICE_FUNC
    inline Index outerStride() const { return m_matrix.outerStride(); }
-    EIGEN_DEVICE_FUNC
    inline Index innerStride() const { return m_matrix.innerStride(); }

    /** \sa MatrixBase::coeff()
      * \warning the coordinates must fit into the referenced triangular part
      */
-    EIGEN_DEVICE_FUNC
    inline Scalar coeff(Index row, Index col) const
    {
      Base::check_coordinates_internal(row, col);
@@ -92,46 +89,36 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
    /** \sa MatrixBase::coeffRef()
      * \warning the coordinates must fit into the referenced triangular part
      */
-    EIGEN_DEVICE_FUNC
    inline Scalar& coeffRef(Index row, Index col)
    {
-      EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView);
      Base::check_coordinates_internal(row, col);
      return m_matrix.const_cast_derived().coeffRef(row, col);
    }

    /** \internal */
-    EIGEN_DEVICE_FUNC
    const MatrixTypeNestedCleaned& _expression() const { return m_matrix; }

-    EIGEN_DEVICE_FUNC
    const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
-    EIGEN_DEVICE_FUNC
    MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }

-    /** Efficient triangular matrix times vector/matrix product */
+    /** Efficient self-adjoint matrix times vector/matrix product */
    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    const Product<SelfAdjointView,OtherDerived>
+    SelfadjointProductMatrix<MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
    operator*(const MatrixBase<OtherDerived>& rhs) const
    {
-      return Product<SelfAdjointView,OtherDerived>(*this, rhs.derived());
+      return SelfadjointProductMatrix
+              <MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
+              (m_matrix, rhs.derived());
    }

-    /** Efficient vector/matrix times triangular matrix product */
+    /** Efficient vector/matrix times self-adjoint matrix product */
    template<typename OtherDerived> friend
-    EIGEN_DEVICE_FUNC
-    const Product<OtherDerived,SelfAdjointView>
+    SelfadjointProductMatrix<OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false>
    operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs)
    {
-      return Product<OtherDerived,SelfAdjointView>(lhs.derived(),rhs);
-    }
-    
-    friend EIGEN_DEVICE_FUNC
-    const SelfAdjointView<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,MatrixType>,UpLo>
-    operator*(const Scalar& s, const SelfAdjointView& mat)
-    {
-      return (s*mat.nestedExpression()).template selfadjointView<UpLo>();
+      return SelfadjointProductMatrix
+              <OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false>
+              (lhs.derived(),rhs.m_matrix);
    }

    /** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this:
@@ -145,8 +132,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
      * \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)
      */
    template<typename DerivedU, typename DerivedV>
-    EIGEN_DEVICE_FUNC
-    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));
+    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha = Scalar(1));

    /** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
      * \f$ this = this + \alpha ( u u^* ) \f$ where \a u is a vector or matrix.
@@ -159,8 +145,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
      * \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)
      */
    template<typename DerivedU>
-    EIGEN_DEVICE_FUNC
-    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
+    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha = Scalar(1));

 /////////// Cholesky module ///////////

@@ -174,10 +159,31 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
    /** Return type of eigenvalues() */
    typedef Matrix<RealScalar, internal::traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType;

-    EIGEN_DEVICE_FUNC
    EigenvaluesReturnType eigenvalues() const;
-    EIGEN_DEVICE_FUNC
    RealScalar operatorNorm() const;
+    
+    #ifdef EIGEN2_SUPPORT
+    template<typename OtherDerived>
+    SelfAdjointView& operator=(const MatrixBase<OtherDerived>& other)
+    {
+      enum {
+        OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
+      };
+      m_matrix.const_cast_derived().template triangularView<UpLo>() = other;
+      m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.adjoint();
+      return *this;
+    }
+    template<typename OtherMatrixType, unsigned int OtherMode>
+    SelfAdjointView& operator=(const TriangularView<OtherMatrixType, OtherMode>& other)
+    {
+      enum {
+        OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
+      };
+      m_matrix.const_cast_derived().template triangularView<UpLo>() = other.toDenseMatrix();
+      m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.toDenseMatrix().adjoint();
+      return *this;
+    }
+    #endif

  protected:
    MatrixTypeNested m_matrix;
@@ -195,56 +201,90 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView

 namespace internal {

-// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
-//      in the future selfadjoint-ness should be defined by the expression traits
-//      such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
-template<typename MatrixType, unsigned int Mode>
-struct evaluator_traits<SelfAdjointView<MatrixType,Mode> >
+template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount, ClearOpposite>
 {
-  typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
-  typedef SelfAdjointShape Shape;
-  
-  static const int AssumeAliasing = 0;
+  enum {
+    col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
+    row = (UnrollCount-1) % Derived1::RowsAtCompileTime
+  };
+
+  static inline void run(Derived1 &dst, const Derived2 &src)
+  {
+    triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
+
+    if(row == col)
+      dst.coeffRef(row, col) = real(src.coeff(row, col));
+    else if(row < col)
+      dst.coeffRef(col, row) = conj(dst.coeffRef(row, col) = src.coeff(row, col));
+  }
 };

-template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version>
-class triangular_dense_assignment_kernel<UpLo,SelfAdjoint,SetOpposite,DstEvaluatorTypeT,SrcEvaluatorTypeT,Functor,Version>
-  : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version>
+template<typename Derived1, typename Derived2, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
 {
-protected:
-  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base;
-  typedef typename Base::DstXprType DstXprType;
-  typedef typename Base::SrcXprType SrcXprType;
-  using Base::m_dst;
-  using Base::m_src;
-  using Base::m_functor;
-public:
-  
-  typedef typename Base::DstEvaluatorType DstEvaluatorType;
-  typedef typename Base::SrcEvaluatorType SrcEvaluatorType;
-  typedef typename Base::Scalar Scalar;
-  typedef typename Base::AssignmentTraits AssignmentTraits;
-  
-  
-  EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
-    : Base(dst, src, func, dstExpr)
-  {}
-  
-  EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
+  static inline void run(Derived1 &, const Derived2 &) {}
+};
+
+template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount, ClearOpposite>
+{
+  enum {
+    col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
+    row = (UnrollCount-1) % Derived1::RowsAtCompileTime
+  };
+
+  static inline void run(Derived1 &dst, const Derived2 &src)
  {
-    eigen_internal_assert(row!=col);
-    Scalar tmp = m_src.coeff(row,col);
-    m_functor.assignCoeff(m_dst.coeffRef(row,col), tmp);
-    m_functor.assignCoeff(m_dst.coeffRef(col,row), numext::conj(tmp));
+    triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
+
+    if(row == col)
+      dst.coeffRef(row, col) = real(src.coeff(row, col));
+    else if(row > col)
+      dst.coeffRef(col, row) = conj(dst.coeffRef(row, col) = src.coeff(row, col));
  }
-  
-  EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)
+};
+
+template<typename Derived1, typename Derived2, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
+{
+  static inline void run(Derived1 &, const Derived2 &) {}
+};
+
+template<typename Derived1, typename Derived2, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
+{
+  typedef typename Derived1::Index Index;
+  static inline void run(Derived1 &dst, const Derived2 &src)
  {
-    Base::assignCoeff(id,id);
+    for(Index j = 0; j < dst.cols(); ++j)
+    {
+      for(Index i = 0; i < j; ++i)
+      {
+        dst.copyCoeff(i, j, src);
+        dst.coeffRef(j,i) = conj(dst.coeff(i,j));
+      }
+      dst.copyCoeff(j, j, src);
+    }
+  }
+};
+
+template<typename Derived1, typename Derived2, bool ClearOpposite>
+struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
+{
+  static inline void run(Derived1 &dst, const Derived2 &src)
+  {
+  typedef typename Derived1::Index Index;
+    for(Index i = 0; i < dst.rows(); ++i)
+    {
+      for(Index j = 0; j < i; ++j)
+      {
+        dst.copyCoeff(i, j, src);
+        dst.coeffRef(j,i) = conj(dst.coeff(i,j));
+      }
+      dst.copyCoeff(i, i, src);
+    }
  }
-  
-  EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index, Index)
-  { eigen_internal_assert(false && "should never be called"); }
 };

 } // end namespace internal
@@ -258,7 +298,7 @@ template<unsigned int UpLo>
 typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
 MatrixBase<Derived>::selfadjointView() const
 {
-  return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived());
+  return derived();
 }

 template<typename Derived>
@@ -266,7 +306,7 @@ template<unsigned int UpLo>
 typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
 MatrixBase<Derived>::selfadjointView()
 {
-  return typename SelfAdjointViewReturnType<UpLo>::Type(derived());
+  return derived();
 }

 } // end namespace Eigen
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -12,35 +12,180 @@

 namespace Eigen { 

+/** \class SelfCwiseBinaryOp
+  * \ingroup Core_Module
+  *
+  * \internal
+  *
+  * \brief Internal helper class for optimizing operators like +=, -=
+  *
+  * This is a pseudo expression class re-implementing the copyCoeff/copyPacket
+  * method to directly performs a +=/-= operations in an optimal way. In particular,
+  * this allows to make sure that the input/output data are loaded only once using
+  * aligned packet loads.
+  *
+  * \sa class SwapWrapper for a similar trick.
+  */
+
+namespace internal {
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> >
+  : traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >
+{
+  enum {
+    // Note that it is still a good idea to preserve the DirectAccessBit
+    // so that assign can correctly align the data.
+    Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
+    OuterStrideAtCompileTime = Lhs::OuterStrideAtCompileTime,
+    InnerStrideAtCompileTime = Lhs::InnerStrideAtCompileTime
+  };
+};
+}
+
+template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
+  : public internal::dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
+{
+  public:
+
+    typedef typename internal::dense_xpr_base<SelfCwiseBinaryOp>::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(SelfCwiseBinaryOp)
+
+    typedef typename internal::packet_traits<Scalar>::type Packet;
+
+    inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
+
+    inline Index rows() const { return m_matrix.rows(); }
+    inline Index cols() const { return m_matrix.cols(); }
+    inline Index outerStride() const { return m_matrix.outerStride(); }
+    inline Index innerStride() const { return m_matrix.innerStride(); }
+    inline const Scalar* data() const { return m_matrix.data(); }
+
+    // note that this function is needed by assign to correctly align loads/stores
+    // TODO make Assign use .data()
+    inline Scalar& coeffRef(Index row, Index col)
+    {
+      EIGEN_STATIC_ASSERT_LVALUE(Lhs)
+      return m_matrix.const_cast_derived().coeffRef(row, col);
+    }
+    inline const Scalar& coeffRef(Index row, Index col) const
+    {
+      return m_matrix.coeffRef(row, col);
+    }
+
+    // note that this function is needed by assign to correctly align loads/stores
+    // TODO make Assign use .data()
+    inline Scalar& coeffRef(Index index)
+    {
+      EIGEN_STATIC_ASSERT_LVALUE(Lhs)
+      return m_matrix.const_cast_derived().coeffRef(index);
+    }
+    inline const Scalar& coeffRef(Index index) const
+    {
+      return m_matrix.const_cast_derived().coeffRef(index);
+    }
+
+    template<typename OtherDerived>
+    void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
+    {
+      OtherDerived& _other = other.const_cast_derived();
+      eigen_internal_assert(row >= 0 && row < rows()
+                         && col >= 0 && col < cols());
+      Scalar& tmp = m_matrix.coeffRef(row,col);
+      tmp = m_functor(tmp, _other.coeff(row,col));
+    }
+
+    template<typename OtherDerived>
+    void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
+    {
+      OtherDerived& _other = other.const_cast_derived();
+      eigen_internal_assert(index >= 0 && index < m_matrix.size());
+      Scalar& tmp = m_matrix.coeffRef(index);
+      tmp = m_functor(tmp, _other.coeff(index));
+    }
+
+    template<typename OtherDerived, int StoreMode, int LoadMode>
+    void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
+    {
+      OtherDerived& _other = other.const_cast_derived();
+      eigen_internal_assert(row >= 0 && row < rows()
+                        && col >= 0 && col < cols());
+      m_matrix.template writePacket<StoreMode>(row, col,
+        m_functor.packetOp(m_matrix.template packet<StoreMode>(row, col),_other.template packet<LoadMode>(row, col)) );
+    }
+
+    template<typename OtherDerived, int StoreMode, int LoadMode>
+    void copyPacket(Index index, const DenseBase<OtherDerived>& other)
+    {
+      OtherDerived& _other = other.const_cast_derived();
+      eigen_internal_assert(index >= 0 && index < m_matrix.size());
+      m_matrix.template writePacket<StoreMode>(index,
+        m_functor.packetOp(m_matrix.template packet<StoreMode>(index),_other.template packet<LoadMode>(index)) );
+    }
+
+    // reimplement lazyAssign to handle complex *= real
+    // see CwiseBinaryOp ctor for details
+    template<typename RhsDerived>
+    EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs)
+    {
+      EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs,RhsDerived)
+      EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename RhsDerived::Scalar);
+      
+    #ifdef EIGEN_DEBUG_ASSIGN
+      internal::assign_traits<SelfCwiseBinaryOp, RhsDerived>::debug();
+    #endif
+      eigen_assert(rows() == rhs.rows() && cols() == rhs.cols());
+      internal::assign_impl<SelfCwiseBinaryOp, RhsDerived>::run(*this,rhs.derived());
+    #ifndef EIGEN_NO_DEBUG
+      this->checkTransposeAliasing(rhs.derived());
+    #endif
+      return *this;
+    }
+    
+    // overloaded to honor evaluation of special matrices
+    // maybe another solution would be to not use SelfCwiseBinaryOp
+    // at first...
+    SelfCwiseBinaryOp& operator=(const Rhs& _rhs)
+    {
+      typename internal::nested<Rhs>::type rhs(_rhs);
+      return Base::operator=(rhs);
+    }
+
+    Lhs& expression() const 
+    { 
+      return m_matrix;
+    }
+
+    const BinaryOp& functor() const 
+    { 
+      return m_functor;
+    }
+
+  protected:
+    Lhs& m_matrix;
+    const BinaryOp& m_functor;
+
+  private:
+    SelfCwiseBinaryOp& operator=(const SelfCwiseBinaryOp&);
+};
+
 template<typename Derived>
 inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
 {
  typedef typename Derived::PlainObject PlainObject;
-  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar>());
-  return derived();
-}
-
-template<typename Derived>
-inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
-{
-  typedef typename Derived::PlainObject PlainObject;
-  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar>());
-  return derived();
-}
-
-template<typename Derived>
-inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
-{
-  typedef typename Derived::PlainObject PlainObject;
-  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar>());
+  SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
+  tmp = PlainObject::Constant(rows(),cols(),other);
  return derived();
 }

 template<typename Derived>
 inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
 {
+  typedef typename internal::conditional<NumTraits<Scalar>::IsInteger,
+                                        internal::scalar_quotient_op<Scalar>,
+                                        internal::scalar_product_op<Scalar> >::type BinOp;
  typedef typename Derived::PlainObject PlainObject;
-  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar>());
+  SelfCwiseBinaryOp<BinOp, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
+  tmp = PlainObject::Constant(rows(),cols(), NumTraits<Scalar>::IsInteger ? other : Scalar(1)/other);
  return derived();
 }

--- a/Eigen/src/Core/Solve.h
+++ b/Eigen/src/Core/Solve.h
@@ -1,150 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_SOLVE_H
-#define EIGEN_SOLVE_H
-
-namespace Eigen {
-
-template<typename Decomposition, typename RhsType, typename StorageKind> class SolveImpl;
-  
-/** \class Solve
-  * \ingroup Core_Module
-  *
-  * \brief Pseudo expression representing a solving operation
-  *
-  * \tparam Decomposition the type of the matrix or decomposion object
-  * \tparam Rhstype the type of the right-hand side
-  *
-  * This class represents an expression of A.solve(B)
-  * and most of the time this is the only way it is used.
-  *
-  */
-namespace internal {
-
-// this solve_traits class permits to determine the evaluation type with respect to storage kind (Dense vs Sparse)
-template<typename Decomposition, typename RhsType,typename StorageKind> struct solve_traits;
-
-template<typename Decomposition, typename RhsType>
-struct solve_traits<Decomposition,RhsType,Dense>
-{
-  typedef typename Decomposition::MatrixType MatrixType;
-  typedef Matrix<typename RhsType::Scalar,
-                 MatrixType::ColsAtCompileTime,
-                 RhsType::ColsAtCompileTime,
-                 RhsType::PlainObject::Options,
-                 MatrixType::MaxColsAtCompileTime,
-                 RhsType::MaxColsAtCompileTime> PlainObject;  
-};
-
-template<typename Decomposition, typename RhsType>
-struct traits<Solve<Decomposition, RhsType> >
-  : traits<typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject>
-{
-  typedef typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject PlainObject;
-  typedef typename promote_index_type<typename Decomposition::StorageIndex, typename RhsType::StorageIndex>::type StorageIndex;
-  typedef traits<PlainObject> BaseTraits;
-  enum {
-    Flags = BaseTraits::Flags & RowMajorBit,
-    CoeffReadCost = Dynamic
-  };
-};
-
-}
-
-
-template<typename Decomposition, typename RhsType>
-class Solve : public SolveImpl<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>
-{
-public:
-  typedef typename internal::traits<Solve>::PlainObject PlainObject;
-  typedef typename internal::traits<Solve>::StorageIndex StorageIndex;
-  
-  Solve(const Decomposition &dec, const RhsType &rhs)
-    : m_dec(dec), m_rhs(rhs)
-  {}
-  
-  EIGEN_DEVICE_FUNC Index rows() const { return m_dec.cols(); }
-  EIGEN_DEVICE_FUNC Index cols() const { return m_rhs.cols(); }
-
-  EIGEN_DEVICE_FUNC const Decomposition& dec() const { return m_dec; }
-  EIGEN_DEVICE_FUNC const RhsType&       rhs() const { return m_rhs; }
-
-protected:
-  const Decomposition &m_dec;
-  const RhsType       &m_rhs;
-};
-
-
-// Specialization of the Solve expression for dense results
-template<typename Decomposition, typename RhsType>
-class SolveImpl<Decomposition,RhsType,Dense>
-  : public MatrixBase<Solve<Decomposition,RhsType> >
-{
-  typedef Solve<Decomposition,RhsType> Derived;
-  
-public:
-  
-  typedef MatrixBase<Solve<Decomposition,RhsType> > Base;
-  EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
-
-private:
-  
-  Scalar coeff(Index row, Index col) const;
-  Scalar coeff(Index i) const;
-};
-
-// Generic API dispatcher
-template<typename Decomposition, typename RhsType, typename StorageKind>
-class SolveImpl : public internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type
-{
-  public:
-    typedef typename internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type Base;
-};
-
-namespace internal {
-
-// Evaluator of Solve -> eval into a temporary
-template<typename Decomposition, typename RhsType>
-struct evaluator<Solve<Decomposition,RhsType> >
-  : public evaluator<typename Solve<Decomposition,RhsType>::PlainObject>
-{
-  typedef Solve<Decomposition,RhsType> SolveType;
-  typedef typename SolveType::PlainObject PlainObject;
-  typedef evaluator<PlainObject> Base;
-  
-  EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve)
-    : m_result(solve.rows(), solve.cols())
-  {
-    ::new (static_cast<Base*>(this)) Base(m_result);
-    solve.dec()._solve_impl(solve.rhs(), m_result);
-  }
-  
-protected:  
-  PlainObject m_result;
-};
-
-// Specialization for "dst = dec.solve(rhs)"
-// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere
-template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
-struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
-{
-  typedef Solve<DecType,RhsType> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
-  {
-    // FIXME shall we resize dst here?
-    src.dec()._solve_impl(src.rhs(), dst);
-  }
-};
-
-} // end namepsace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_SOLVE_H
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -68,7 +68,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
    if(!useRhsDirectly)
      MappedRhs(actualRhs,rhs.size()) = rhs;

-    triangular_solve_vector<LhsScalar, RhsScalar, Index, Side, Mode, LhsProductTraits::NeedToConjugate,
+    triangular_solve_vector<LhsScalar, RhsScalar, typename Lhs::Index, Side, Mode, LhsProductTraits::NeedToConjugate,
                            (int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>
      ::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs);

@@ -82,6 +82,7 @@ template<typename Lhs, typename Rhs, int Side, int Mode>
 struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
 {
  typedef typename Rhs::Scalar Scalar;
+  typedef typename Rhs::Index Index;
  typedef blas_traits<Lhs> LhsProductTraits;
  typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;

@@ -95,7 +96,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
    typedef internal::gemm_blocking_space<(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
              Rhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxRowsAtCompileTime,4> BlockingType;

-    BlockingType blocking(rhs.rows(), rhs.cols(), size, 1, false);
+    BlockingType blocking(rhs.rows(), rhs.cols(), size);

    triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
                               (Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
@@ -170,10 +171,10 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
  */
 template<typename MatrixType, unsigned int Mode>
 template<int Side, typename OtherDerived>
-void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
+void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
 {
  OtherDerived& other = _other.const_cast_derived();
-  eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
+  eigen_assert( cols() == rows() && ((Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols())) );
  eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));

  enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit  && OtherDerived::IsVectorAtCompileTime };
@@ -182,7 +183,7 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot
  OtherCopy otherCopy(other);

  internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type,
-    Side, Mode>::run(derived().nestedExpression(), otherCopy);
+    Side, Mode>::run(nestedExpression(), otherCopy);

  if (copy)
    other = otherCopy;
@@ -198,8 +199,8 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot
  * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
  * is an upper (resp. lower) triangular matrix.
  *
-  * Example: \include Triangular_solve.cpp
-  * Output: \verbinclude Triangular_solve.out
+  * Example: \include MatrixBase_marked.cpp
+  * Output: \verbinclude MatrixBase_marked.out
  *
  * This function returns an expression of the inverse-multiply and can works in-place if it is assigned
  * to the same matrix or vector \a other.
@@ -212,9 +213,9 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot
 template<typename Derived, unsigned int Mode>
 template<int Side, typename Other>
 const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other>
-TriangularViewImpl<Derived,Mode,Dense>::solve(const MatrixBase<Other>& other) const
+TriangularView<Derived,Mode>::solve(const MatrixBase<Other>& other) const
 {
-  return internal::triangular_solve_retval<Side,TriangularViewType,Other>(derived(), other.derived());
+  return internal::triangular_solve_retval<Side,TriangularView,Other>(*this, other.derived());
 }

 namespace internal {
@@ -231,6 +232,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
 {
  typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;
  typedef ReturnByValue<triangular_solve_retval> Base;
+  typedef typename Base::Index Index;

  triangular_solve_retval(const TriangularType& tri, const Rhs& rhs)
    : m_triangularMatrix(tri), m_rhs(rhs)
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -17,131 +17,19 @@ namespace internal {
 template<typename ExpressionType, typename Scalar>
 inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
 {
-  Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
-  
-  if(maxCoeff>scale)
+  Scalar max = bl.cwiseAbs().maxCoeff();
+  if (max>scale)
  {
-    ssq = ssq * numext::abs2(scale/maxCoeff);
-    Scalar tmp = Scalar(1)/maxCoeff;
-    if(tmp > NumTraits<Scalar>::highest())
-    {
-      invScale = NumTraits<Scalar>::highest();
-      scale = Scalar(1)/invScale;
-    }
-    else if(maxCoeff>NumTraits<Scalar>::highest()) // we got a INF
-    {
-      invScale = Scalar(1);
-      scale = maxCoeff;
-    }
-    else
-    {
-      scale = maxCoeff;
-      invScale = tmp;
-    }
+    ssq = ssq * abs2(scale/max);
+    scale = max;
+    invScale = Scalar(1)/scale;
  }
-  else if(maxCoeff!=maxCoeff) // we got a NaN
-  {
-    scale = maxCoeff;
-  }
-  
-  // TODO if the maxCoeff is much much smaller than the current scale,
+  // TODO if the max is much much smaller than the current scale,
  // then we can neglect this sub vector
-  if(scale>Scalar(0)) // if scale==0, then bl is 0 
-    ssq += (bl*invScale).squaredNorm();
+  ssq += (bl*invScale).squaredNorm();
 }
-
-template<typename Derived>
-inline typename NumTraits<typename traits<Derived>::Scalar>::Real
-blueNorm_impl(const EigenBase<Derived>& _vec)
-{
-  typedef typename Derived::RealScalar RealScalar;  
-  using std::pow;
-  using std::sqrt;
-  using std::abs;
-  const Derived& vec(_vec.derived());
-  static bool initialized = false;
-  static RealScalar b1, b2, s1m, s2m, rbig, relerr;
-  if(!initialized)
-  {
-    int ibeta, it, iemin, iemax, iexp;
-    RealScalar eps;
-    // This program calculates the machine-dependent constants
-    // bl, b2, slm, s2m, relerr overfl
-    // from the "basic" machine-dependent numbers
-    // nbig, ibeta, it, iemin, iemax, rbig.
-    // The following define the basic machine-dependent constants.
-    // For portability, the PORT subprograms "ilmaeh" and "rlmach"
-    // are used. For any specific computer, each of the assignment
-    // statements can be replaced
-    ibeta = std::numeric_limits<RealScalar>::radix;                 // base for floating-point numbers
-    it    = std::numeric_limits<RealScalar>::digits;                // number of base-beta digits in mantissa
-    iemin = std::numeric_limits<RealScalar>::min_exponent;          // minimum exponent
-    iemax = std::numeric_limits<RealScalar>::max_exponent;          // maximum exponent
-    rbig  = (std::numeric_limits<RealScalar>::max)();               // largest floating-point number
-
-    iexp  = -((1-iemin)/2);
-    b1    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // lower boundary of midrange
-    iexp  = (iemax + 1 - it)/2;
-    b2    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // upper boundary of midrange
-
-    iexp  = (2-iemin)/2;
-    s1m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for lower range
-    iexp  = - ((iemax+it)/2);
-    s2m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for upper range
-
-    eps     = RealScalar(pow(double(ibeta), 1-it));
-    relerr  = sqrt(eps);                                            // tolerance for neglecting asml
-    initialized = true;
-  }
-  Index n = vec.size();
-  RealScalar ab2 = b2 / RealScalar(n);
-  RealScalar asml = RealScalar(0);
-  RealScalar amed = RealScalar(0);
-  RealScalar abig = RealScalar(0);
-  for(typename Derived::InnerIterator it(vec, 0); it; ++it)
-  {
-    RealScalar ax = abs(it.value());
-    if(ax > ab2)     abig += numext::abs2(ax*s2m);
-    else if(ax < b1) asml += numext::abs2(ax*s1m);
-    else             amed += numext::abs2(ax);
-  }
-  if(amed!=amed)
-    return amed;  // we got a NaN
-  if(abig > RealScalar(0))
-  {
-    abig = sqrt(abig);
-    if(abig > rbig) // overflow, or *this contains INF values
-      return abig;  // return INF
-    if(amed > RealScalar(0))
-    {
-      abig = abig/s2m;
-      amed = sqrt(amed);
-    }
-    else
-      return abig/s2m;
-  }
-  else if(asml > RealScalar(0))
-  {
-    if (amed > RealScalar(0))
-    {
-      abig = sqrt(amed);
-      amed = sqrt(asml) / s1m;
-    }
-    else
-      return sqrt(asml)/s1m;
-  }
-  else
-    return sqrt(amed);
-  asml = numext::mini(abig, amed);
-  abig = numext::maxi(abig, amed);
-  if(asml <= abig*relerr)
-    return abig;
-  else
-    return abig * sqrt(RealScalar(1) + numext::abs2(asml/abig));
 }

-} // end namespace internal
-
 /** \returns the \em l2 norm of \c *this avoiding underflow and overflow.
  * This version use a blockwise two passes algorithm:
  *  1 - find the absolute largest coefficient \c s
@@ -156,34 +44,21 @@ template<typename Derived>
 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
 MatrixBase<Derived>::stableNorm() const
 {
-  using std::sqrt;
-  using std::abs;
+  using std::min;
  const Index blockSize = 4096;
  RealScalar scale(0);
  RealScalar invScale(1);
  RealScalar ssq(0); // sum of square
-  
-  typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
-  typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
-  DerivedCopy copy(derived());
-  
  enum {
-    CanAlign = (int(Flags)&DirectAccessBit) || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME
+    Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
  };
-  typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
-                                                   typename DerivedCopyClean
-                                                   ::ConstSegmentReturnType>::type SegmentWrapper;
  Index n = size();
-  
-  if(n==1)
-    return abs(this->coeff(0));
-  
-  Index bi = internal::first_default_aligned(copy);
+  Index bi = internal::first_aligned(derived());
  if (bi>0)
-    internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale);
+    internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
  for (; bi<n; bi+=blockSize)
-    internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);
-  return scale * sqrt(ssq);
+    internal::stable_norm_kernel(this->segment(bi,(min)(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
+  return scale * internal::sqrt(ssq);
 }

 /** \returns the \em l2 norm of \c *this using the Blue's algorithm.
@@ -199,7 +74,90 @@ template<typename Derived>
 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
 MatrixBase<Derived>::blueNorm() const
 {
-  return internal::blueNorm_impl(*this);
+  using std::pow;
+  using std::min;
+  using std::max;
+  static bool initialized = false;
+  static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
+  if(!initialized)
+  {
+    int ibeta, it, iemin, iemax, iexp;
+    RealScalar abig, eps;
+    // This program calculates the machine-dependent constants
+    // bl, b2, slm, s2m, relerr overfl
+    // from the "basic" machine-dependent numbers
+    // ibeta, it, iemin, iemax, rbig.
+    // The following define the basic machine-dependent constants.
+    // For portability, the PORT subprograms "ilmaeh" and "rlmach"
+    // are used. For any specific computer, each of the assignment
+    // statements can be replaced
+    ibeta = std::numeric_limits<RealScalar>::radix;         // base for floating-point numbers
+    it    = std::numeric_limits<RealScalar>::digits;        // number of base-beta digits in mantissa
+    iemin = std::numeric_limits<RealScalar>::min_exponent;  // minimum exponent
+    iemax = std::numeric_limits<RealScalar>::max_exponent;  // maximum exponent
+    rbig  = (std::numeric_limits<RealScalar>::max)();         // largest floating-point number
+
+    iexp  = -((1-iemin)/2);
+    b1    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));  // lower boundary of midrange
+    iexp  = (iemax + 1 - it)/2;
+    b2    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // upper boundary of midrange
+
+    iexp  = (2-iemin)/2;
+    s1m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for lower range
+    iexp  = - ((iemax+it)/2);
+    s2m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for upper range
+
+    overfl  = rbig*s2m;             // overflow boundary for abig
+    eps     = RealScalar(pow(double(ibeta), 1-it));
+    relerr  = internal::sqrt(eps);         // tolerance for neglecting asml
+    abig    = RealScalar(1.0/eps - 1.0);
+    initialized = true;
+  }
+  Index n = size();
+  RealScalar ab2 = b2 / RealScalar(n);
+  RealScalar asml = RealScalar(0);
+  RealScalar amed = RealScalar(0);
+  RealScalar abig = RealScalar(0);
+  for(Index j=0; j<n; ++j)
+  {
+    RealScalar ax = internal::abs(coeff(j));
+    if(ax > ab2)     abig += internal::abs2(ax*s2m);
+    else if(ax < b1) asml += internal::abs2(ax*s1m);
+    else             amed += internal::abs2(ax);
+  }
+  if(abig > RealScalar(0))
+  {
+    abig = internal::sqrt(abig);
+    if(abig > overfl)
+    {
+      return rbig;
+    }
+    if(amed > RealScalar(0))
+    {
+      abig = abig/s2m;
+      amed = internal::sqrt(amed);
+    }
+    else
+      return abig/s2m;
+  }
+  else if(asml > RealScalar(0))
+  {
+    if (amed > RealScalar(0))
+    {
+      abig = internal::sqrt(amed);
+      amed = internal::sqrt(asml) / s1m;
+    }
+    else
+      return internal::sqrt(asml)/s1m;
+  }
+  else
+    return internal::sqrt(amed);
+  asml = (min)(abig, amed);
+  abig = (max)(abig, amed);
+  if(asml <= abig*relerr)
+    return abig;
+  else
+    return abig * internal::sqrt(RealScalar(1) + internal::abs2(asml/abig));
 }

 /** \returns the \em l2 norm of \c *this avoiding undeflow and overflow.
--- a/Eigen/src/Core/Stride.h
+++ b/Eigen/src/Core/Stride.h
@@ -44,14 +44,13 @@ template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
 class Stride
 {
  public:
-    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+    typedef DenseIndex Index;
    enum {
      InnerStrideAtCompileTime = _InnerStrideAtCompileTime,
      OuterStrideAtCompileTime = _OuterStrideAtCompileTime
    };

    /** Default constructor, for use when strides are fixed at compile time */
-    EIGEN_DEVICE_FUNC
    Stride()
      : m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
    {
@@ -59,7 +58,6 @@ class Stride
    }

    /** Constructor allowing to pass the strides at runtime */
-    EIGEN_DEVICE_FUNC
    Stride(Index outerStride, Index innerStride)
      : m_outer(outerStride), m_inner(innerStride)
    {
@@ -67,16 +65,13 @@ class Stride
    }

    /** Copy constructor */
-    EIGEN_DEVICE_FUNC
    Stride(const Stride& other)
      : m_outer(other.outer()), m_inner(other.inner())
    {}

    /** \returns the outer stride */
-    EIGEN_DEVICE_FUNC
    inline Index outer() const { return m_outer.value(); }
    /** \returns the inner stride */
-    EIGEN_DEVICE_FUNC
    inline Index inner() const { return m_inner.value(); }

  protected:
@@ -86,24 +81,26 @@ class Stride

 /** \brief Convenience specialization of Stride to specify only an inner stride
  * See class Map for some examples */
-template<int Value>
+template<int Value = Dynamic>
 class InnerStride : public Stride<0, Value>
 {
    typedef Stride<0, Value> Base;
  public:
-    EIGEN_DEVICE_FUNC InnerStride() : Base() {}
-    EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {} // FIXME making this explicit could break valid code
+    typedef DenseIndex Index;
+    InnerStride() : Base() {}
+    InnerStride(Index v) : Base(0, v) {}
 };

 /** \brief Convenience specialization of Stride to specify only an outer stride
  * See class Map for some examples */
-template<int Value>
+template<int Value = Dynamic>
 class OuterStride : public Stride<Value, 0>
 {
    typedef Stride<Value, 0> Base;
  public:
-    EIGEN_DEVICE_FUNC OuterStride() : Base() {}
-    EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {} // FIXME making this explicit could break valid code
+    typedef DenseIndex Index;
+    OuterStride() : Base() {}
+    OuterStride(Index v) : Base(v,0) {}
 };

 } // end namespace Eigen
--- a/Eigen/src/Core/Swap.h
+++ b/Eigen/src/Core/Swap.h
@@ -12,55 +12,114 @@

 namespace Eigen { 

+/** \class SwapWrapper
+  * \ingroup Core_Module
+  *
+  * \internal
+  *
+  * \brief Internal helper class for swapping two expressions
+  */
 namespace internal {
+template<typename ExpressionType>
+struct traits<SwapWrapper<ExpressionType> > : traits<ExpressionType> {};
+}

-// Overload default assignPacket behavior for swapping them
-template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT>
-class generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, Specialized>
- : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn>
+template<typename ExpressionType> class SwapWrapper
+  : public internal::dense_xpr_base<SwapWrapper<ExpressionType> >::type
 {
-protected:
-  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn> Base;
-  using Base::m_dst;
-  using Base::m_src;
-  using Base::m_functor;
-  
-public:
-  typedef typename Base::Scalar Scalar;
-  typedef typename Base::DstXprType DstXprType;
-  typedef swap_assign_op<Scalar> Functor;
-  
-  EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
-    : Base(dst, src, func, dstExpr)
-  {}
-  
-  template<int StoreMode, int LoadMode, typename PacketType>
-  void assignPacket(Index row, Index col)
-  {
-    PacketType tmp = m_src.template packet<LoadMode,PacketType>(row,col);
-    const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode,PacketType>(row,col));
-    m_dst.template writePacket<StoreMode>(row,col,tmp);
-  }
-  
-  template<int StoreMode, int LoadMode, typename PacketType>
-  void assignPacket(Index index)
-  {
-    PacketType tmp = m_src.template packet<LoadMode,PacketType>(index);
-    const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode,PacketType>(index));
-    m_dst.template writePacket<StoreMode>(index,tmp);
-  }
-  
-  // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
-  template<int StoreMode, int LoadMode, typename PacketType>
-  void assignPacketByOuterInner(Index outer, Index inner)
-  {
-    Index row = Base::rowIndexByOuterInner(outer, inner); 
-    Index col = Base::colIndexByOuterInner(outer, inner);
-    assignPacket<StoreMode,LoadMode,PacketType>(row, col);
-  }
-};
+  public:

-} // namespace internal
+    typedef typename internal::dense_xpr_base<SwapWrapper>::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(SwapWrapper)
+    typedef typename internal::packet_traits<Scalar>::type Packet;
+
+    inline SwapWrapper(ExpressionType& xpr) : m_expression(xpr) {}
+
+    inline Index rows() const { return m_expression.rows(); }
+    inline Index cols() const { return m_expression.cols(); }
+    inline Index outerStride() const { return m_expression.outerStride(); }
+    inline Index innerStride() const { return m_expression.innerStride(); }
+    
+    typedef typename internal::conditional<
+                       internal::is_lvalue<ExpressionType>::value,
+                       Scalar,
+                       const Scalar
+                     >::type ScalarWithConstIfNotLvalue;
+                     
+    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
+    inline const Scalar* data() const { return m_expression.data(); }
+
+    inline Scalar& coeffRef(Index row, Index col)
+    {
+      return m_expression.const_cast_derived().coeffRef(row, col);
+    }
+
+    inline Scalar& coeffRef(Index index)
+    {
+      return m_expression.const_cast_derived().coeffRef(index);
+    }
+
+    inline Scalar& coeffRef(Index row, Index col) const
+    {
+      return m_expression.coeffRef(row, col);
+    }
+
+    inline Scalar& coeffRef(Index index) const
+    {
+      return m_expression.coeffRef(index);
+    }
+
+    template<typename OtherDerived>
+    void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
+    {
+      OtherDerived& _other = other.const_cast_derived();
+      eigen_internal_assert(row >= 0 && row < rows()
+                         && col >= 0 && col < cols());
+      Scalar tmp = m_expression.coeff(row, col);
+      m_expression.coeffRef(row, col) = _other.coeff(row, col);
+      _other.coeffRef(row, col) = tmp;
+    }
+
+    template<typename OtherDerived>
+    void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
+    {
+      OtherDerived& _other = other.const_cast_derived();
+      eigen_internal_assert(index >= 0 && index < m_expression.size());
+      Scalar tmp = m_expression.coeff(index);
+      m_expression.coeffRef(index) = _other.coeff(index);
+      _other.coeffRef(index) = tmp;
+    }
+
+    template<typename OtherDerived, int StoreMode, int LoadMode>
+    void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
+    {
+      OtherDerived& _other = other.const_cast_derived();
+      eigen_internal_assert(row >= 0 && row < rows()
+                        && col >= 0 && col < cols());
+      Packet tmp = m_expression.template packet<StoreMode>(row, col);
+      m_expression.template writePacket<StoreMode>(row, col,
+        _other.template packet<LoadMode>(row, col)
+      );
+      _other.template writePacket<LoadMode>(row, col, tmp);
+    }
+
+    template<typename OtherDerived, int StoreMode, int LoadMode>
+    void copyPacket(Index index, const DenseBase<OtherDerived>& other)
+    {
+      OtherDerived& _other = other.const_cast_derived();
+      eigen_internal_assert(index >= 0 && index < m_expression.size());
+      Packet tmp = m_expression.template packet<StoreMode>(index);
+      m_expression.template writePacket<StoreMode>(index,
+        _other.template packet<LoadMode>(index)
+      );
+      _other.template writePacket<LoadMode>(index, tmp);
+    }
+
+    ExpressionType& expression() const { return m_expression; }
+
+  protected:
+    ExpressionType& m_expression;
+};

 } // end namespace Eigen

--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -2,7 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -29,10 +29,13 @@ namespace Eigen {

 namespace internal {
 template<typename MatrixType>
-struct traits<Transpose<MatrixType> > : public traits<MatrixType>
+struct traits<Transpose<MatrixType> > : traits<MatrixType>
 {
-  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename nested<MatrixType>::type MatrixTypeNested;
  typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
+  typedef typename traits<MatrixType>::StorageKind StorageKind;
+  typedef typename traits<MatrixType>::XprKind XprKind;
  enum {
    RowsAtCompileTime = MatrixType::ColsAtCompileTime,
    ColsAtCompileTime = MatrixType::RowsAtCompileTime,
@@ -42,6 +45,7 @@ struct traits<Transpose<MatrixType> > : public traits<MatrixType>
    Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit),
    Flags1 = Flags0 | FlagsLvalueBit,
    Flags = Flags1 ^ RowMajorBit,
+    CoeffReadCost = MatrixTypeNestedPlain::CoeffReadCost,
    InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
    OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
  };
@@ -57,23 +61,19 @@ template<typename MatrixType> class Transpose

    typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
    EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
-    typedef typename internal::remove_all<MatrixType>::type NestedExpression;

-    EIGEN_DEVICE_FUNC
-    explicit inline Transpose(MatrixType& matrix) : m_matrix(matrix) {}
+    inline Transpose(MatrixType& matrix) : m_matrix(matrix) {}

    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)

-    EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.cols(); }
-    EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.rows(); }
+    inline Index rows() const { return m_matrix.cols(); }
+    inline Index cols() const { return m_matrix.rows(); }

    /** \returns the nested expression */
-    EIGEN_DEVICE_FUNC
    const typename internal::remove_all<typename MatrixType::Nested>::type&
    nestedExpression() const { return m_matrix; }

    /** \returns the nested expression */
-    EIGEN_DEVICE_FUNC
    typename internal::remove_all<typename MatrixType::Nested>::type&
    nestedExpression() { return m_matrix.const_cast_derived(); }

@@ -97,27 +97,16 @@ struct TransposeImpl_base<MatrixType, false>

 } // end namespace internal

-// Generic API dispatcher
-template<typename XprType, typename StorageKind>
-class TransposeImpl
-  : public internal::generic_xpr_base<Transpose<XprType> >::type
-{
-public:
-  typedef typename internal::generic_xpr_base<Transpose<XprType> >::type Base;
-};
-
 template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
  : public internal::TransposeImpl_base<MatrixType>::type
 {
  public:

    typedef typename internal::TransposeImpl_base<MatrixType>::type Base;
-    using Base::coeffRef;
    EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
-    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)

-    EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
-    EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
+    inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
+    inline Index outerStride() const { return derived().nestedExpression().outerStride(); }

    typedef typename internal::conditional<
                       internal::is_lvalue<MatrixType>::value,
@@ -125,21 +114,64 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
                       const Scalar
                     >::type ScalarWithConstIfNotLvalue;

-    EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
-    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return derived().nestedExpression().data(); }
+    inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
+    inline const Scalar* data() const { return derived().nestedExpression().data(); }

-    // FIXME: shall we keep the const version of coeffRef?
-    EIGEN_DEVICE_FUNC
-    inline const Scalar& coeffRef(Index rowId, Index colId) const
+    inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
    {
-      return derived().nestedExpression().coeffRef(colId, rowId);
+      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
+      return derived().nestedExpression().const_cast_derived().coeffRef(col, row);
+    }
+
+    inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
+    {
+      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
+      return derived().nestedExpression().const_cast_derived().coeffRef(index);
+    }
+
+    inline const Scalar& coeffRef(Index row, Index col) const
+    {
+      return derived().nestedExpression().coeffRef(col, row);
    }

-    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index index) const
    {
      return derived().nestedExpression().coeffRef(index);
    }
+
+    inline CoeffReturnType coeff(Index row, Index col) const
+    {
+      return derived().nestedExpression().coeff(col, row);
+    }
+
+    inline CoeffReturnType coeff(Index index) const
+    {
+      return derived().nestedExpression().coeff(index);
+    }
+
+    template<int LoadMode>
+    inline const PacketScalar packet(Index row, Index col) const
+    {
+      return derived().nestedExpression().template packet<LoadMode>(col, row);
+    }
+
+    template<int LoadMode>
+    inline void writePacket(Index row, Index col, const PacketScalar& x)
+    {
+      derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(col, row, x);
+    }
+
+    template<int LoadMode>
+    inline const PacketScalar packet(Index index) const
+    {
+      return derived().nestedExpression().template packet<LoadMode>(index);
+    }
+
+    template<int LoadMode>
+    inline void writePacket(Index index, const PacketScalar& x)
+    {
+      derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(index, x);
+    }
 };

 /** \returns an expression of the transpose of *this.
@@ -165,7 +197,7 @@ template<typename Derived>
 inline Transpose<Derived>
 DenseBase<Derived>::transpose()
 {
-  return TransposeReturnType(derived());
+  return derived();
 }

 /** This is the const version of transpose().
@@ -174,7 +206,7 @@ DenseBase<Derived>::transpose()
  *
  * \sa transposeInPlace(), adjoint() */
 template<typename Derived>
-inline typename DenseBase<Derived>::ConstTransposeReturnType
+inline const typename DenseBase<Derived>::ConstTransposeReturnType
 DenseBase<Derived>::transpose() const
 {
  return ConstTransposeReturnType(derived());
@@ -203,7 +235,8 @@ template<typename Derived>
 inline const typename MatrixBase<Derived>::AdjointReturnType
 MatrixBase<Derived>::adjoint() const
 {
-  return AdjointReturnType(this->transpose());
+  return this->transpose(); // in the complex case, the .conjugate() is be implicit here
+                            // due to implicit conversion to return type
 }

 /***************************************************************************
@@ -213,41 +246,21 @@ MatrixBase<Derived>::adjoint() const
 namespace internal {

 template<typename MatrixType,
-  bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic,
-  bool MatchPacketSize =
-        (int(MatrixType::RowsAtCompileTime) == int(internal::packet_traits<typename MatrixType::Scalar>::size))
-    &&  (internal::evaluator<MatrixType>::Flags&PacketAccessBit) >
+  bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic>
 struct inplace_transpose_selector;

 template<typename MatrixType>
-struct inplace_transpose_selector<MatrixType,true,false> { // square matrix
+struct inplace_transpose_selector<MatrixType,true> { // square matrix
  static void run(MatrixType& m) {
-    m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
+    m.template triangularView<StrictlyUpper>().swap(m.transpose());
  }
 };

-// TODO: vectorized path is currently limited to LargestPacketSize x LargestPacketSize cases only.
 template<typename MatrixType>
-struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize
-  static void run(MatrixType& m) {
-    typedef typename MatrixType::Scalar Scalar;
-    typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
-    const Index PacketSize = internal::packet_traits<Scalar>::size;
-    const Index Alignment = internal::evaluator<MatrixType>::Alignment;
-    PacketBlock<Packet> A;
-    for (Index i=0; i<PacketSize; ++i)
-      A.packet[i] = m.template packetByOuterInner<Alignment>(i,0);
-    internal::ptranspose(A);
-    for (Index i=0; i<PacketSize; ++i)
-      m.template writePacket<Alignment>(m.rowIndexByOuterInner(i,0), m.colIndexByOuterInner(i,0), A.packet[i]);
-  }
-};
-
-template<typename MatrixType,bool MatchPacketSize>
-struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non square matrix
+struct inplace_transpose_selector<MatrixType,false> { // non square matrix
  static void run(MatrixType& m) {
    if (m.rows()==m.cols())
-      m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
+      m.template triangularView<StrictlyUpper>().swap(m.transpose());
    else
      m = m.transpose().eval();
  }
@@ -265,20 +278,17 @@ struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non squ
  * m = m.transpose().eval();
  * \endcode
  * and is faster and also safer because in the latter line of code, forgetting the eval() results
-  * in a bug caused by \ref TopicAliasing "aliasing".
+  * in a bug caused by aliasing.
  *
  * Notice however that this method is only useful if you want to replace a matrix by its own transpose.
  * If you just need the transpose of a matrix, use transpose().
  *
-  * \note if the matrix is not square, then \c *this must be a resizable matrix. 
-  * This excludes (non-square) fixed-size matrices, block-expressions and maps.
+  * \note if the matrix is not square, then \c *this must be a resizable matrix.
  *
  * \sa transpose(), adjoint(), adjointInPlace() */
 template<typename Derived>
 inline void DenseBase<Derived>::transposeInPlace()
 {
-  eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic))
-               && "transposeInPlace() called on a non-square non-resizable matrix");
  internal::inplace_transpose_selector<Derived>::run(derived());
 }

@@ -302,7 +312,6 @@ inline void DenseBase<Derived>::transposeInPlace()
  * If you just need the adjoint of a matrix, use adjoint().
  *
  * \note if the matrix is not square, then \c *this must be a resizable matrix.
-  * This excludes (non-square) fixed-size matrices, block-expressions and maps.
  *
  * \sa transpose(), adjoint(), transposeInPlace() */
 template<typename Derived>
@@ -317,6 +326,14 @@ inline void MatrixBase<Derived>::adjointInPlace()

 namespace internal {

+template<typename BinOp,typename NestedXpr,typename Rhs>
+struct blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
+ : blas_traits<NestedXpr>
+{
+  typedef SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> XprType;
+  static inline const XprType extract(const XprType& x) { return x; }
+};
+
 template<bool DestIsTransposed, typename OtherDerived>
 struct check_transpose_aliasing_compile_time_selector
 {
@@ -368,7 +385,7 @@ struct checkTransposeAliasing_impl
        eigen_assert((!check_transpose_aliasing_run_time_selector
                      <typename Derived::Scalar,blas_traits<Derived>::IsTransposed,OtherDerived>
                      ::run(extract_data(dst), other))
-          && "aliasing detected during transposition, use transposeInPlace() "
+          && "aliasing detected during tranposition, use transposeInPlace() "
             "or evaluate the rhs into a temporary using .eval()");

    }
@@ -382,15 +399,15 @@ struct checkTransposeAliasing_impl<Derived, OtherDerived, false>
    }
 };

-template<typename Dst, typename Src>
-void check_for_aliasing(const Dst &dst, const Src &src)
-{
-  internal::checkTransposeAliasing_impl<Dst, Src>::run(dst, src);
-}
-
 } // end namespace internal

-#endif // EIGEN_NO_DEBUG
+template<typename Derived>
+template<typename OtherDerived>
+void DenseBase<Derived>::checkTransposeAliasing(const OtherDerived& other) const
+{
+    internal::checkTransposeAliasing_impl<Derived, OtherDerived>::run(derived(), other);
+}
+#endif

 } // end namespace Eigen

--- a/Eigen/src/Core/Transpositions.h
+++ b/Eigen/src/Core/Transpositions.h
@@ -41,6 +41,10 @@ namespace Eigen {
  * \sa class PermutationMatrix
  */

+namespace internal {
+template<typename TranspositionType, typename MatrixType, int Side, bool Transposed=false> struct transposition_matrix_product_retval;
+}
+
 template<typename Derived>
 class TranspositionsBase
 {
@@ -49,8 +53,7 @@ class TranspositionsBase
  public:

    typedef typename Traits::IndicesType IndicesType;
-    typedef typename IndicesType::Scalar StorageIndex;
-    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+    typedef typename IndicesType::Scalar Index;

    Derived& derived() { return *static_cast<Derived*>(this); }
    const Derived& derived() const { return *static_cast<const Derived*>(this); }
@@ -62,7 +65,7 @@ class TranspositionsBase
      indices() = other.indices();
      return derived();
    }
-    
+
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    /** This is a special case of the templated operator=. Its purpose is to
      * prevent a default operator= from hiding the templated operator=.
@@ -75,24 +78,20 @@ class TranspositionsBase
    #endif

    /** \returns the number of transpositions */
-    Index size() const { return indices().size(); }
-    /** \returns the number of rows of the equivalent permutation matrix */
-    Index rows() const { return indices().size(); }
-    /** \returns the number of columns of the equivalent permutation matrix */
-    Index cols() const { return indices().size(); }
+    inline Index size() const { return indices().size(); }

    /** Direct access to the underlying index vector */
-    inline const StorageIndex& coeff(Index i) const { return indices().coeff(i); }
+    inline const Index& coeff(Index i) const { return indices().coeff(i); }
    /** Direct access to the underlying index vector */
-    inline StorageIndex& coeffRef(Index i) { return indices().coeffRef(i); }
+    inline Index& coeffRef(Index i) { return indices().coeffRef(i); }
    /** Direct access to the underlying index vector */
-    inline const StorageIndex& operator()(Index i) const { return indices()(i); }
+    inline const Index& operator()(Index i) const { return indices()(i); }
    /** Direct access to the underlying index vector */
-    inline StorageIndex& operator()(Index i) { return indices()(i); }
+    inline Index& operator()(Index i) { return indices()(i); }
    /** Direct access to the underlying index vector */
-    inline const StorageIndex& operator[](Index i) const { return indices()(i); }
+    inline const Index& operator[](Index i) const { return indices()(i); }
    /** Direct access to the underlying index vector */
-    inline StorageIndex& operator[](Index i) { return indices()(i); }
+    inline Index& operator[](Index i) { return indices()(i); }

    /** const version of indices(). */
    const IndicesType& indices() const { return derived().indices(); }
@@ -100,15 +99,15 @@ class TranspositionsBase
    IndicesType& indices() { return derived().indices(); }

    /** Resizes to given size. */
-    inline void resize(Index newSize)
+    inline void resize(int size)
    {
-      indices().resize(newSize);
+      indices().resize(size);
    }

    /** Sets \c *this to represents an identity transformation */
    void setIdentity()
    {
-      for(StorageIndex i = 0; i < indices().size(); ++i)
+      for(int i = 0; i < indices().size(); ++i)
        coeffRef(i) = i;
    }

@@ -145,24 +144,23 @@ class TranspositionsBase
 };

 namespace internal {
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
-struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
- : traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
+struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> >
 {
-  typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
-  typedef TranspositionsStorage StorageKind;
+  typedef IndexType Index;
+  typedef Matrix<Index, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
 };
 }

-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
-class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
+class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> >
 {
    typedef internal::traits<Transpositions> Traits;
  public:

    typedef TranspositionsBase<Transpositions> Base;
    typedef typename Traits::IndicesType IndicesType;
-    typedef typename IndicesType::Scalar StorageIndex;
+    typedef typename IndicesType::Scalar Index;

    inline Transpositions() {}

@@ -217,33 +215,31 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim


 namespace internal {
-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
-struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,_PacketAccess> >
- : traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess>
+struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,_PacketAccess> >
 {
-  typedef Map<const Matrix<_StorageIndex,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
-  typedef _StorageIndex StorageIndex;
-  typedef TranspositionsStorage StorageKind;
+  typedef IndexType Index;
+  typedef Map<const Matrix<Index,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
 };
 }

-template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int PacketAccess>
-class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,PacketAccess>
- : public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,PacketAccess> >
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int PacketAccess>
+class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess>
+ : public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess> >
 {
    typedef internal::traits<Map> Traits;
  public:

    typedef TranspositionsBase<Map> Base;
    typedef typename Traits::IndicesType IndicesType;
-    typedef typename IndicesType::Scalar StorageIndex;
+    typedef typename IndicesType::Scalar Index;

-    explicit inline Map(const StorageIndex* indicesPtr)
-      : m_indices(indicesPtr)
+    inline Map(const Index* indices)
+      : m_indices(indices)
    {}

-    inline Map(const StorageIndex* indicesPtr, Index size)
-      : m_indices(indicesPtr,size)
+    inline Map(const Index* indices, Index size)
+      : m_indices(indices,size)
    {}

    /** Copies the \a other transpositions into \c *this */
@@ -278,9 +274,9 @@ class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,P
 namespace internal {
 template<typename _IndicesType>
 struct traits<TranspositionsWrapper<_IndicesType> >
- : traits<PermutationWrapper<_IndicesType> >
 {
-  typedef TranspositionsStorage StorageKind;
+  typedef typename _IndicesType::Scalar Index;
+  typedef _IndicesType IndicesType;
 };
 }

@@ -293,9 +289,9 @@ class TranspositionsWrapper

    typedef TranspositionsBase<TranspositionsWrapper> Base;
    typedef typename Traits::IndicesType IndicesType;
-    typedef typename IndicesType::Scalar StorageIndex;
+    typedef typename IndicesType::Scalar Index;

-    explicit inline TranspositionsWrapper(IndicesType& indices)
+    inline TranspositionsWrapper(IndicesType& indices)
      : m_indices(indices)
    {}

@@ -328,43 +324,80 @@ class TranspositionsWrapper
    const typename IndicesType::Nested m_indices;
 };

-
-
 /** \returns the \a matrix with the \a transpositions applied to the columns.
  */
-template<typename MatrixDerived, typename TranspositionsDerived>
-EIGEN_DEVICE_FUNC
-const Product<MatrixDerived, TranspositionsDerived, AliasFreeProduct>
-operator*(const MatrixBase<MatrixDerived> &matrix,
-          const TranspositionsBase<TranspositionsDerived>& transpositions)
+template<typename Derived, typename TranspositionsDerived>
+inline const internal::transposition_matrix_product_retval<TranspositionsDerived, Derived, OnTheRight>
+operator*(const MatrixBase<Derived>& matrix,
+          const TranspositionsBase<TranspositionsDerived> &transpositions)
 {
-  return Product<MatrixDerived, TranspositionsDerived, AliasFreeProduct>
-            (matrix.derived(), transpositions.derived());
+  return internal::transposition_matrix_product_retval
+           <TranspositionsDerived, Derived, OnTheRight>
+           (transpositions.derived(), matrix.derived());
 }

 /** \returns the \a matrix with the \a transpositions applied to the rows.
  */
-template<typename TranspositionsDerived, typename MatrixDerived>
-EIGEN_DEVICE_FUNC
-const Product<TranspositionsDerived, MatrixDerived, AliasFreeProduct>
-operator*(const TranspositionsBase<TranspositionsDerived> &transpositions,
-          const MatrixBase<MatrixDerived>& matrix)
+template<typename Derived, typename TranspositionDerived>
+inline const internal::transposition_matrix_product_retval
+               <TranspositionDerived, Derived, OnTheLeft>
+operator*(const TranspositionsBase<TranspositionDerived> &transpositions,
+          const MatrixBase<Derived>& matrix)
 {
-  return Product<TranspositionsDerived, MatrixDerived, AliasFreeProduct>
-            (transpositions.derived(), matrix.derived());
+  return internal::transposition_matrix_product_retval
+           <TranspositionDerived, Derived, OnTheLeft>
+           (transpositions.derived(), matrix.derived());
 }

-// Template partial specialization for transposed/inverse transpositions
-
 namespace internal {

-template<typename Derived>
-struct traits<Transpose<TranspositionsBase<Derived> > >
- : traits<Derived>
-{};
+template<typename TranspositionType, typename MatrixType, int Side, bool Transposed>
+struct traits<transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
+{
+  typedef typename MatrixType::PlainObject ReturnType;
+};
+
+template<typename TranspositionType, typename MatrixType, int Side, bool Transposed>
+struct transposition_matrix_product_retval
+ : public ReturnByValue<transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
+{
+    typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
+    typedef typename TranspositionType::Index Index;
+
+    transposition_matrix_product_retval(const TranspositionType& tr, const MatrixType& matrix)
+      : m_transpositions(tr), m_matrix(matrix)
+    {}
+
+    inline int rows() const { return m_matrix.rows(); }
+    inline int cols() const { return m_matrix.cols(); }
+
+    template<typename Dest> inline void evalTo(Dest& dst) const
+    {
+      const int size = m_transpositions.size();
+      Index j = 0;
+
+      if(!(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix)))
+        dst = m_matrix;
+
+      for(int k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
+        if((j=m_transpositions.coeff(k))!=k)
+        {
+          if(Side==OnTheLeft)
+            dst.row(k).swap(dst.row(j));
+          else if(Side==OnTheRight)
+            dst.col(k).swap(dst.col(j));
+        }
+    }
+
+  protected:
+    const TranspositionType& m_transpositions;
+    typename MatrixType::Nested m_matrix;
+};

 } // end namespace internal

+/* Template partial specialization for transposed/inverse transpositions */
+
 template<typename TranspositionsDerived>
 class Transpose<TranspositionsBase<TranspositionsDerived> >
 {
@@ -372,31 +405,27 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
    typedef typename TranspositionType::IndicesType IndicesType;
  public:

-    explicit Transpose(const TranspositionType& t) : m_transpositions(t) {}
+    Transpose(const TranspositionType& t) : m_transpositions(t) {}

-    Index size() const { return m_transpositions.size(); }
-    Index rows() const { return m_transpositions.size(); }
-    Index cols() const { return m_transpositions.size(); }
+    inline int size() const { return m_transpositions.size(); }

    /** \returns the \a matrix with the inverse transpositions applied to the columns.
      */
-    template<typename OtherDerived> friend
-    const Product<OtherDerived, Transpose, AliasFreeProduct>
-    operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)
+    template<typename Derived> friend
+    inline const internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>
+    operator*(const MatrixBase<Derived>& matrix, const Transpose& trt)
    {
-      return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt.derived());
+      return internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>(trt.m_transpositions, matrix.derived());
    }

    /** \returns the \a matrix with the inverse transpositions applied to the rows.
      */
-    template<typename OtherDerived>
-    const Product<Transpose, OtherDerived, AliasFreeProduct>
-    operator*(const MatrixBase<OtherDerived>& matrix) const
+    template<typename Derived>
+    inline const internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>
+    operator*(const MatrixBase<Derived>& matrix) const
    {
-      return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived());
+      return internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>(m_transpositions, matrix.derived());
    }
-    
-    const TranspositionType& nestedExpression() const { return m_transpositions; }

  protected:
    const TranspositionType& m_transpositions;
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Gael Guennebaud	2221cdbe62	bump to 3.1.3	2013-04-16 09:38:40 +02:00
Hauke Heibel	ba1e62f516	Prevent calling .norm() on integer matrices in the unit tests. (transplanted from `b5d8299ee7` )	2013-02-28 12:33:34 +01:00
Gael Guennebaud	ce2b0ac502	Fix two numerical issues in unit tests. (transplanted from `455e6e38b6` )	2013-02-27 08:07:18 +01:00
Gael Guennebaud	1f7dfcff8a	Add missing template keyword (transplanted from `858ac9ffe0` )	2013-03-01 00:03:28 +01:00
Gael Guennebaud	2234043f99	Enable SSE with ICC even when it mimics a gcc version lower than 4.2 (transplanted from `6eaff5a098` )	2013-04-11 19:48:34 +02:00
Gael Guennebaud	69ff8afea7	Workaround gcc-4.7 bug #53900 (too aggressive optimization in our alignment check) (transplanted from `19c78cf510` )	2013-01-22 22:59:09 +01:00
Gael Guennebaud	64a6d37729	Fix a serious bug in handmade_aligned_realloc: original data have to be moved if the alignment offset differs. (transplanted from `7e04d7db02` )	2013-04-10 13:58:20 +02:00
Gael Guennebaud	4ac874ed03	Upload CDASH submissions for the 3.1 branch to a separate project	2013-04-10 10:06:36 +02:00
Gael Guennebaud	0029599c4a	Fix bug #581 : remove useless piece of code is blueNorm (transplanted from `8f44205671` )	2013-04-09 09:23:40 +02:00
Claas H. Köhler	f78dffffda	Forward compiler flags to Fortran workaround (transplanted from `d6d638c751` )	2013-03-17 14:17:44 +01:00
Gael Guennebaud	e304a92f41	fix sparse vector assignment from a sparse matrix (transplanted from `98ce4455dd` )	2013-03-06 11:58:22 +01:00
Gael Guennebaud	2674a31421	Fix a compilation with CGAL::Gmpq by adding explicit internal:: namespace when calling abs().	2013-02-26 16:46:10 +01:00
Gael Guennebaud	de25881056	Fix computation of outer-stride when calling .real() or .imag() (transplanted from `63135a7350` )	2013-02-26 15:08:50 +01:00
Jitse Niesen	7df8b57770	Fix linear vectorized transversal in linspace (fixes bug #526 ). (transplanted from `b4f6aec195` )	2013-02-18 17:26:03 +00:00
Gael Guennebaud	ddba6054e0	Push cdash report of the 3.1 branch in its own cdash subproject	2013-02-15 15:30:27 +01:00
Gael Guennebaud	6adc13ea04	Fix SSE plog<float> to return -INF on 0 (transplanted from `8745da14d8` )	2013-02-14 23:34:05 +01:00
Gael Guennebaud	66cbfd4d39	Fix some implicit int64 to int conversion warnings. However, the real issue is that PermutationMatrix mixes the type of the stored indices and the "Index" type used for the sizes, coeff indices, etc., which should be DenseIndex.	2013-02-14 18:16:51 +01:00
Gael Guennebaud	394784c999	Fix bug in aligned_free with windows CE (transplanted from `25bcbfb10c` )	2013-02-13 19:09:31 +01:00
Gael Guennebaud	fcc46f49ca	Fix bug #551 : compilation issue when using EIGEN_DEFAULT_DENSE_INDEX_TYPE	2013-02-09 09:43:17 +01:00
Gael Guennebaud	92983fc95a	Fix traits of Map<Quaternion>, and respectively extend the unit tests (transplanted from `392ffce3b9` )	2013-01-20 10:21:54 +01:00
Gael Guennebaud	d5702fb7e9	Some minor documentation fixes in Quaternion (transplanted from `fb89b66229` )	2013-01-20 10:20:39 +01:00
Christoph Hertzberg	8aaa570c6d	Fix bug #507 : Mark variable as unused in NDEBUG case	2012-12-20 11:21:47 +01:00
Christoph Hertzberg	8c65cacad8	Fix bug #531 : Empty line in <table> made doxygen render it as paragraphs	2012-12-17 16:13:42 +01:00
Gael Guennebaud	2041114285	Fix bug #533 : add some missing const qualifiers (was already fixed in devel branch)	2012-12-16 20:36:59 +01:00
Gael Guennebaud	ac406a7685	Fix bug #535 : unused variable warnings (transplanted from `925a5b7d07` )	2012-12-16 20:21:28 +01:00
Gael Guennebaud	45ccaacc54	fix geometry tutorial (transplanted from `8719b1bf16` )	2012-11-29 22:48:13 +08:00
Gael Guennebaud	43e90e3575	Added tag 3.1.2 for changeset `63c58c8436`	2012-11-05 22:23:03 +01:00
Gael Guennebaud	63c58c8436	bump to 3.1.2	2012-11-05 22:22:49 +01:00
Gael Guennebaud	caf24f1c9e	Disable opengl demo if Qt4 or OpenGL cannot be found.	2012-10-31 11:36:45 +01:00
Gael Guennebaud	f7b959b5fb	Fix unused variable warning	2012-10-30 10:10:29 +01:00
Gael Guennebaud	ad27746b5d	fix bug #524 : Pardiso's parameter array does not have to be aligned! (transplanted from `b3254c9af5` )	2012-10-24 10:31:04 +02:00
Gael Guennebaud	90912315ac	fix bug #521 : __cpuidex is not available on all architectures supported by MSVC (transplanted from `138897cc06` )	2012-10-24 10:21:41 +02:00
Gael Guennebaud	26e9563baf	Windows CE does not provide an aligned_malloc function. (transplanted from `9b418afff6` )	2012-10-24 10:12:42 +02:00
Gael Guennebaud	85c40128e4	Fix bug #519 : AlignedBox::dim() was wrong for dynamic dimensions (transplanted from `0753463d70` )	2012-10-24 09:58:35 +02:00
Gael Guennebaud	7b13a7fd23	fix comma initializer when inserting empty matrices (transplanted from `a67eea05c1` )	2012-10-03 21:58:14 +02:00
Gael Guennebaud	2f0307cdb5	fix dense=sparse*diagonal (there was an issue in the values returned by the .outer() function of the related iterators) (transplanted from `fec6df1f7d` )	2012-10-03 09:06:19 +02:00
Gael Guennebaud	749317561c	add an assertion when inserting an already existing element	2012-10-02 23:02:23 +02:00
Gael Guennebaud	dc5b335f9f	add scalar multiple to diagonal matrices	2012-09-27 09:37:05 +02:00
Gael Guennebaud	74a2a0f224	fix SparseMatrix option bit flag in eval<> helper (transplanted from `1b004d5794` )	2012-09-27 09:22:10 +02:00
Gael Guennebaud	b0862dcb2f	fix bug #515 : missing explicit scalar conversion	2012-09-27 00:23:19 +02:00
Gael Guennebaud	ba013de9da	fix bug #511 : pretty printers on windows (transplanted from `44374788b5` )	2012-09-26 23:48:48 +02:00
Gael Guennebaud	4ea9113efc	fix bug #509 : warning with gcc 4.7 (transplanted from `7c4b55fda9` )	2012-09-26 23:32:22 +02:00
Gael Guennebaud	76a2db4c74	workaround weird compilation error with MSVC (transplanted from `48c4d48aec` )	2012-09-14 09:54:56 +02:00
Gael Guennebaud	53a0142583	fix compilation with m.array().min/max(scalar) (transplanted from `0c584dcf4d` )	2012-09-12 17:50:07 +02:00
Benoit Jacob	733fd6e7ba	Replace COPYING.LGPL by a copy of the LGPL 2.1 (instead of LGPL 3). Indeed, all the LGPL code we use, is licensed under LGPL 2.1 (with some files being "2.1 or later").	2012-09-10 13:27:44 -04:00
Gael Guennebaud	26282498dc	fix bug #501 : remove aggressive mat/scalar optimization (was replaced by mat*(1/scalar) for non integer types) (transplanted from `721671cc4e` )	2012-09-08 11:52:03 +02:00
Gael Guennebaud	f4bdc66e83	remove stupid assert in blue norm. (transplanted from `06d2fe453d` )	2012-09-07 23:19:24 +02:00
Gael Guennebaud	16deb21376	forward resize() function from Array/Matrix-Wrapper to the nested expression such that mat.array().resize(a,b) is now allowed. (transplanted from `9da41cc527` )	2012-08-30 16:28:53 +02:00
Gael Guennebaud	221f54698c	Fix out-of-range memory access in GEMV (the memory was not used for the computation, only to assemble unaligned packets from aligned packet loads)	2012-08-30 10:52:15 +02:00
Gael Guennebaud	2858b6d2d6	fix bug #499 : the image was missing because of a dependency issue when building/executing the "special" examples (transplanted from `75435079ca` )	2012-08-27 11:11:25 +02:00
Gael Guennebaud	e589e3f0b6	simplify eigen-doc.tgz file generation, and make it more future proof (transplanted from `aa1aa36d6d` )	2012-08-27 10:56:44 +02:00
Gael Guennebaud	66e7d02533	remove EXTRACT_ALL (transplanted from `904c2e6cfb` )	2012-08-27 10:30:10 +02:00
Thomas Capricelli	5cde86fce8	add piwik code to documentation (web stats engine)	2012-08-21 22:39:47 +02:00
Thomas Capricelli	d0cb2b78d3	documentation script : the 3.1 branch is currently 'dox', not 'dox-devel'	2012-08-21 20:42:09 +02:00
jdh8	c0da31309d	merge	2012-08-18 21:10:42 +08:00
Jitse Niesen	66c3343238	Undo incorrect fix in previous commit, and fix real mistake instead. (transplanted from `dee866a99a` )	2012-08-17 15:36:37 +01:00
Jitse Niesen	0c078ca80a	Documentation fixes. Thanks to Rodney Sparapani for reporting these. (transplanted from `5eefca637e` )	2012-08-17 14:49:18 +01:00
jdh8	87e4b10747	merge	2012-08-08 17:47:59 +08:00
Gael Guennebaud	e2886d34ef	Fix precision regression when attempting to fix underflow issues. (transplanted from `af824091be` )	2012-08-05 09:57:31 +02:00
jdh8	246d12dcab	Fix some typos in MatrixLogarithm to improve accuracy.	2012-08-03 23:39:15 +08:00
Desire NUENTSA	1914024965	bug #493 : multiple calls to FindUmfPack	2012-08-02 10:00:23 +02:00
Gael Guennebaud	a03c970c5c	fix various regressions with MKL support (transplanted from `8ab0e16e27` )	2012-07-28 16:32:43 +02:00
Gael Guennebaud	9f945b6028	SparseMatrix: add missing ctor for ReturnByValue (transplanted from `7518201de8` )	2012-07-25 23:03:10 +02:00
Gael Guennebaud	4691a4e4b5	Fix aliasing issue in sparse matrix assignment. (m=-m; or m=m.transpose(); with m sparse work again) (transplanted from `e75b1eb883` )	2012-07-25 09:33:50 +02:00
Jitse Niesen	e546ee315a	Use EISPACK's strategy re max number of iters in Schur decomposition (bug #479 ).	2012-07-22 22:03:23 +01:00
Benoit Jacob	a63c4da68e	Added tag 3.1.1 for changeset `22415b3dbb`	2012-07-22 10:18:18 -04:00
Benoit Jacob	22415b3dbb	bump version number	2012-07-22 10:17:54 -04:00
Desire NUENTSA	b4065b5598	bug #479 : Adjust max iterations count wrt matrix size	2012-07-16 11:31:59 +02:00
Benoit Jacob	7273feee92	add COPYING.MINPACK	2012-07-15 11:46:22 -04:00
Benoit Jacob	3a0c40de29	MINPACK license is OK for MPL2 after all	2012-07-15 10:30:57 -04:00
Benoit Jacob	d1765d98a9	add COPYING.README	2012-07-15 10:29:09 -04:00
Benoit Jacob	de23f2a27e	add COPYING.MPL2	2012-07-15 10:20:59 -04:00
Benoit Jacob	48ea53288d	remove outdated "Eigen itself is part of the KDE project" outside of eigen2 files	2012-07-15 10:17:45 -04:00
Gael Guennebaud	b2d857af3e	document EIGEN_MPL2_ONLY (transplanted from `54559094ec` )	2012-07-14 09:56:03 +02:00
Gael Guennebaud	98f16a6ee7	fix bug #485 : conflict between a typedef and template type parameter (transplanted from `46b1c7a0ce` )	2012-07-13 20:54:38 +02:00
Benoit Jacob	0de22418e1	Add a EIGEN_MPL2_ONLY build option to generate compiler errors when including non-MPL2 modules	2012-07-13 14:42:47 -04:00
Benoit Jacob	6072cac80e	Manual MPL2 relicensing fixes	2012-07-13 14:42:47 -04:00
Benoit Jacob	132eb28e9c	Automatic relicensing to MPL2 using Keirs script. Manual fixup follows.	2012-07-13 14:42:47 -04:00
Keir Mierle	0bec280dd1	Add preliminary script to relicense Eigen to MPL2.	2012-07-11 11:29:52 -07:00
Gael Guennebaud	bff29c0af9	fix computation of fixed size sub/super diagonal size (transplanted from `3e6329a0d9` )	2012-07-10 22:39:05 +02:00
Gael Guennebaud	a1f7a87e1e	Fix possible underflow issues in SelfAdjointEigenSolver (transplanted from `a2c3003be2` )	2012-07-10 09:51:26 +02:00
Gael Guennebaud	689d64156c	fix compilation with MSVC	2012-07-05 21:58:01 +02:00
Gael Guennebaud	95e3e4f865	Fix bug #480 : workaround the Android NDK defining isfinite as a macro (transplanted from `5dbdde0420` )	2012-07-05 17:22:25 +02:00
Jitse Niesen	32e83f6000	doc: Typo in CustomizingEigen, introduced in previous commit. Thanks to Christoph Hertzberg for noting this. (transplanted from `60edf02f6f` )	2012-07-05 13:56:28 +01:00
Jitse Niesen	4a7609fa74	doc: Add constructor to example for inheritance. See "Error in Inheriting Eigen::Vector3d" on forum. (transplanted from `b582b2ebdc` )	2012-07-05 13:36:02 +01:00
Gael Guennebaud	dee3325ef5	fix bug #486 : template speacialization of member functions must be declared inline to avoid duplicate references (transplanted from `0a7ce6ad69` )	2012-07-05 13:32:23 +02:00
Gael Guennebaud	2c88252599	fix bug #487 : isometry * scaling was not compiling	2012-07-04 18:25:07 +02:00
Gael Guennebaud	7027b5a8f9	workaround compilation issue with MSVC 2005 (transplanted from `eee34f2da4` )	2012-07-02 10:20:44 +02:00
Gael Guennebaud	2d45ad75ea	fix implicit scalar conversion (transplanted from `139c91bf30` )	2012-06-28 13:12:49 +02:00
Gael Guennebaud	4328afc1a5	remove dynamic allocation for fixed size object and triangular matrix-matrix products (transplanted from `57b5804974` )	2012-06-26 17:45:01 +02:00
Gael Guennebaud	086e0aae51	Added tag 3.1.0 for changeset `dd3cd5455e`	2012-06-24 11:29:15 +02:00
Gael Guennebaud	dd3cd5455e	fix GMRES (transplanted from `e46fc8c05c` )	2012-06-23 19:29:21 +02:00
Gael Guennebaud	9c2cc0b243	create 3.1 branch and bump to 3.1.0	2012-06-22 09:26:23 +02:00