simplification (no reason anymore to write that in that convoluted way)

argh, forgot to re-add the throw()
* fix bugs in EigenTesting.cmake: it didn't work with -DEIGEN_NO_ASSERTION_CHECKING=ON
2026-04-10 11:34:33 +08:00 · 2009-05-15 16:05:45 +00:00 · 2009-05-15 15:54:52 +00:00 · 2009-05-15 15:53:26 +00:00 · 2009-05-13 02:02:22 +00:00 · 2009-05-12 13:43:40 +00:00
375 changed files with 31871 additions and 5740 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,67 +1,122 @@
-PROJECT(Eigen)
-SET(EIGEN_VERSION_NUMBER "2.0-beta1")
+project(Eigen)
+set(EIGEN_VERSION_NUMBER "2.0.52-unstable")

 #if the svnversion program is absent, this will leave the SVN_REVISION string empty,
 #but won't stop CMake.
-EXECUTE_PROCESS(COMMAND svnversion -n ${CMAKE_SOURCE_DIR}
-                OUTPUT_VARIABLE EIGEN_SVN_REVISION)
+execute_process(COMMAND svnversion -n ${CMAKE_SOURCE_DIR}
+                OUTPUT_VARIABLE EIGEN_SVNVERSION_OUTPUT)

-IF(EIGEN_SVN_REVISION)
-SET(EIGEN_VERSION "${EIGEN_VERSION_NUMBER} (SVN revision ${EIGEN_SVN_REVISION})")
-ELSE(EIGEN_SVN_REVISION)
-SET(EIGEN_VERSION "${EIGEN_VERSION_NUMBER}")
-ENDIF(EIGEN_SVN_REVISION)
+#we only want EIGEN_SVN_REVISION if it is an actual revision number, not a string like "exported"
+string(REGEX MATCH "^[0-9]+.*" EIGEN_SVN_REVISION "${EIGEN_SVNVERSION_OUTPUT}")

-SET(EIGEN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
-SET(EIGEN_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
+if(EIGEN_SVN_REVISION)
+  set(EIGEN_VERSION "${EIGEN_VERSION_NUMBER} (SVN revision ${EIGEN_SVN_REVISION})")
+else(EIGEN_SVN_REVISION)
+  set(EIGEN_VERSION "${EIGEN_VERSION_NUMBER}")
+endif(EIGEN_SVN_REVISION)

-CMAKE_MINIMUM_REQUIRED(VERSION 2.4)
+cmake_minimum_required(VERSION 2.6.2)
+
+include(CheckCXXCompilerFlag)

 set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)

-OPTION(BUILD_TESTS "Build tests" OFF)
-OPTION(BUILD_DEMOS "Build demos" OFF)
-OPTION(BUILD_LIB "Build the binary shared library" OFF)
-OPTION(BUILD_BTL "Build benchmark suite" OFF)
+option(EIGEN_BUILD_TESTS "Build tests" OFF)
+option(EIGEN_BUILD_DEMOS "Build demos" OFF)
+if(NOT WIN32)
+  option(EIGEN_BUILD_LIB "Build the binary shared library" OFF)
+endif(NOT WIN32)
+option(EIGEN_BUILD_BTL "Build benchmark suite" OFF)

-IF(BUILD_LIB)
-  OPTION(TEST_LIB "Build the unit tests using the library (disable -pedantic)" OFF)
-ENDIF(BUILD_LIB)
+if(EIGEN_BUILD_LIB)
+  option(EIGEN_TEST_LIB "Build the unit tests using the library (disable -pedantic)" OFF)
+endif(EIGEN_BUILD_LIB)

-SET(CMAKE_INCLUDE_CURRENT_DIR ON)
+set(CMAKE_INCLUDE_CURRENT_DIR ON)

-IF(CMAKE_COMPILER_IS_GNUCXX)
-  IF(CMAKE_SYSTEM_NAME MATCHES Linux)
-    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing")
-    IF(NOT TEST_LIB)
-      SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
-    ENDIF(NOT TEST_LIB)
-    IF(TEST_SSE2)
-      SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
-      MESSAGE("Enabling SSE2 in tests/examples")
-    ENDIF(TEST_SSE2)
-    IF(TEST_SSE3)
-      SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
-      MESSAGE("Enabling SSE3 in tests/examples")
-    ENDIF(TEST_SSE3)
-    IF(TEST_SSSE3)
-      SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3")
-      MESSAGE("Enabling SSSE3 in tests/examples")
-    ENDIF(TEST_SSSE3)
-    IF(TEST_ALTIVEC)
-      SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
-      MESSAGE("Enabling AltiVec in tests/examples")
-    ENDIF(TEST_ALTIVEC)
-  ENDIF(CMAKE_SYSTEM_NAME MATCHES Linux)
-ENDIF(CMAKE_COMPILER_IS_GNUCXX)
+if(CMAKE_COMPILER_IS_GNUCXX)
+  if(CMAKE_SYSTEM_NAME MATCHES Linux)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing")

-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
+    check_cxx_compiler_flag("-Wextra" COMPILER_SUPPORT_WEXTRA)
+    if(COMPILER_SUPPORT_WEXTRA)
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra")
+    endif()

-ADD_SUBDIRECTORY(Eigen)
-ADD_SUBDIRECTORY(test)
-ADD_SUBDIRECTORY(doc)
-ADD_SUBDIRECTORY(demos)
+    if(NOT EIGEN_TEST_LIB)
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
+    endif(NOT EIGEN_TEST_LIB)

-IF(BUILD_BTL)
-  ADD_SUBDIRECTORY(bench/btl)
-ENDIF(BUILD_BTL)
+    option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
+    if(EIGEN_TEST_SSE2)
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
+      message("Enabling SSE2 in tests/examples")
+    endif(EIGEN_TEST_SSE2)
+
+    option(EIGEN_TEST_SSE3 "Enable/Disable SSE3 in tests/examples" OFF)
+    if(EIGEN_TEST_SSE3)
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
+      message("Enabling SSE3 in tests/examples")
+    endif(EIGEN_TEST_SSE3)
+
+    option(EIGEN_TEST_SSSE3 "Enable/Disable SSSE3 in tests/examples" OFF)
+    if(EIGEN_TEST_SSSE3)
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3")
+      message("Enabling SSSE3 in tests/examples")
+    endif(EIGEN_TEST_SSSE3)
+
+    option(EIGEN_TEST_ALTIVEC "Enable/Disable altivec in tests/examples" OFF)
+    if(EIGEN_TEST_ALTIVEC)
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
+      message("Enabling AltiVec in tests/examples")
+    endif(EIGEN_TEST_ALTIVEC)
+
+  endif(CMAKE_SYSTEM_NAME MATCHES Linux)
+endif(CMAKE_COMPILER_IS_GNUCXX)
+
+if(MSVC)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ")
+
+  option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
+  if(EIGEN_TEST_SSE2)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2")
+    message("Enabling SSE2 in tests/examples")
+  endif(EIGEN_TEST_SSE2)
+endif(MSVC)
+
+option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF)
+if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
+  add_definitions(-DEIGEN_DONT_VECTORIZE=1)
+  message("Disabling vectorization in tests/examples")
+endif(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
+
+set(INCLUDE_INSTALL_DIR
+    "${CMAKE_INSTALL_PREFIX}/include/eigen2"
+    CACHE PATH
+    "The directory where we install the header files"
+    FORCE)
+
+add_subdirectory(Eigen)
+
+add_subdirectory(doc)
+
+if(EIGEN_BUILD_TESTS)
+  include(CTest)
+  add_subdirectory(test)
+endif(EIGEN_BUILD_TESTS)
+
+add_subdirectory(unsupported)
+
+if(EIGEN_BUILD_DEMOS)
+  add_subdirectory(demos)
+endif(EIGEN_BUILD_DEMOS)
+
+if(EIGEN_BUILD_BTL)
+  add_subdirectory(bench/btl)
+endif(EIGEN_BUILD_BTL)
+
+if(EIGEN_BUILD_TESTS)
+  ei_testing_print_summary()
+endif(EIGEN_BUILD_TESTS)
--- a/CTestConfig.cmake
+++ b/CTestConfig.cmake
@@ -0,0 +1,13 @@
+## This file should be placed in the root directory of your project.
+## Then modify the CMakeLists.txt file in the root directory of your
+## project to incorporate the testing dashboard.
+## # The following are required to uses Dart and the Cdash dashboard
+##   ENABLE_TESTING()
+##   INCLUDE(Dart)
+set(CTEST_PROJECT_NAME "Eigen")
+set(CTEST_NIGHTLY_START_TIME "05:00:00 UTC")
+
+set(CTEST_DROP_METHOD "http")
+set(CTEST_DROP_SITE "www.cdash.org")
+set(CTEST_DROP_LOCATION "/CDashPublic/submit.php?project=Eigen")
+set(CTEST_DROP_SITE_CDASH TRUE)
--- a/2
+++ b/2
@@ -5,7 +5,7 @@
 #---------------------------------------------------------------------------
 DOXYFILE_ENCODING      = UTF-8
 PROJECT_NAME           = Eigen
-PROJECT_NUMBER         = 2.0-alpha7
+PROJECT_NUMBER         = 2.0
 OUTPUT_DIRECTORY       = ./
 CREATE_SUBDIRS         = NO
 OUTPUT_LANGUAGE        = English
--- a/Eigen/Array
+++ b/Eigen/Array
@@ -3,9 +3,11 @@

 #include "Core"

+#include "src/Core/util/DisableMSVCWarnings.h"
+
 namespace Eigen {

-/** \defgroup Array Array module
+/** \defgroup Array_Module Array module
  * This module provides several handy features to manipulate matrices as simple array of values.
  * In addition to listed classes, it defines various methods of the Cwise interface
  * (accessible from MatrixBase::cwise()), including:
@@ -14,8 +16,12 @@ namespace Eigen {
  *  - sin, cos, sqrt, pow, exp, log, square, cube, inverse (reciprocal).
  *
  * This module also provides various MatrixBase methods, including:
-  *  - \ref MatrixBase::all() "all", \ref MatrixBase::any() "any",
-  *  - \ref MatrixBase::Random() "random matrix initialization"
+  *  - boolean reductions: \ref MatrixBase::all() "all", \ref MatrixBase::any() "any", \ref MatrixBase::count() "count",
+  *  - \ref MatrixBase::Random() "random matrix initialization",
+  *  - a \ref MatrixBase::select() "select" function mimicking the trivariate ?: operator,
+  *  - \ref MatrixBase::colwise() "column-wise" and \ref MatrixBase::rowwise() "row-wise" reductions,
+  *  - \ref MatrixBase::reverse() "matrix reverse",
+  *  - \ref MatrixBase::lpNorm() "generic matrix norm".
  *
  * \code
  * #include <Eigen/Array>
@@ -24,11 +30,16 @@ namespace Eigen {

 #include "src/Array/CwiseOperators.h"
 #include "src/Array/Functors.h"
-#include "src/Array/AllAndAny.h"
+#include "src/Array/BooleanRedux.h"
 #include "src/Array/Select.h"
 #include "src/Array/PartialRedux.h"
 #include "src/Array/Random.h"
+#include "src/Array/Norms.h"
+#include "src/Array/Replicate.h"
+#include "src/Array/Reverse.h"

 } // namespace Eigen

+#include "src/Core/util/EnableMSVCWarnings.h"
+
 #endif // EIGEN_ARRAY_MODULE_H
--- a/Eigen/CMakeLists.txt
+++ b/Eigen/CMakeLists.txt
@@ -1,34 +1,28 @@
-SET(Eigen_HEADERS Core LU Cholesky QR Geometry Sparse Array SVD Regression)
+set(Eigen_HEADERS Core LU Cholesky QR Geometry Sparse Array SVD LeastSquares QtAlignedMalloc StdVector)

-IF(BUILD_LIB)
-    SET(Eigen_SRCS
+if(EIGEN_BUILD_LIB)
+    set(Eigen_SRCS
      src/Core/CoreInstantiations.cpp
      src/Cholesky/CholeskyInstantiations.cpp
      src/QR/QrInstantiations.cpp
    )

-    ADD_LIBRARY(Eigen2 SHARED ${Eigen_SRCS})
+    add_library(Eigen2 SHARED ${Eigen_SRCS})

-    INSTALL(TARGETS Eigen2
+    install(TARGETS Eigen2
            RUNTIME DESTINATION bin
            LIBRARY DESTINATION lib
            ARCHIVE DESTINATION lib)
-ENDIF(BUILD_LIB)
+endif(EIGEN_BUILD_LIB)

-IF(CMAKE_COMPILER_IS_GNUCXX)
-    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g1 -O2")
-    SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g1 -O2")
-ENDIF(CMAKE_COMPILER_IS_GNUCXX)
+if(CMAKE_COMPILER_IS_GNUCXX)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g1 -O2")
+    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g1 -O2")
+endif(CMAKE_COMPILER_IS_GNUCXX)

-SET(INCLUDE_INSTALL_DIR
-    "${CMAKE_INSTALL_PREFIX}/include/eigen2"
-    CACHE PATH
-    "The directory where we install the header files"
-    FORCE)
-
-INSTALL(FILES
+install(FILES
  ${Eigen_HEADERS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen
+  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel
  )

-ADD_SUBDIRECTORY(src)
+add_subdirectory(src)
--- a/Eigen/Cholesky
+++ b/Eigen/Cholesky
@@ -3,6 +3,8 @@

 #include "Core"

+#include "src/Core/util/DisableMSVCWarnings.h"
+
 // Note that EIGEN_HIDE_HEAVY_CODE has to be defined per module
 #if (defined EIGEN_EXTERN_INSTANTIATIONS) && (EIGEN_EXTERN_INSTANTIATIONS>=2)
  #ifndef EIGEN_HIDE_HEAVY_CODE
@@ -15,10 +17,13 @@
 namespace Eigen {

 /** \defgroup Cholesky_Module Cholesky module
+  *
+  * \nonstableyet
+  *
  * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices.
  * Those decompositions are accessible via the following MatrixBase methods:
-  *  - MatrixBase::cholesky(),
-  *  - MatrixBase::choleskyNoSqrt()
+  *  - MatrixBase::llt(),
+  *  - MatrixBase::ldlt()
  *
  * \code
  * #include <Eigen/Cholesky>
@@ -27,14 +32,14 @@ namespace Eigen {

 #include "src/Array/CwiseOperators.h"
 #include "src/Array/Functors.h"
-#include "src/Cholesky/Cholesky.h"
-#include "src/Cholesky/CholeskyWithoutSquareRoot.h"
+#include "src/Cholesky/LLT.h"
+#include "src/Cholesky/LDLT.h"

 } // namespace Eigen

 #define EIGEN_CHOLESKY_MODULE_INSTANTIATE_TYPE(MATRIXTYPE,PREFIX) \
-  PREFIX template class Cholesky<MATRIXTYPE>; \
-  PREFIX template class CholeskyWithoutSquareRoot<MATRIXTYPE>
+  PREFIX template class LLT<MATRIXTYPE>; \
+  PREFIX template class LDLT<MATRIXTYPE>

 #define EIGEN_CHOLESKY_MODULE_INSTANTIATE(PREFIX) \
  EIGEN_CHOLESKY_MODULE_INSTANTIATE_TYPE(Matrix2f,PREFIX); \
@@ -55,4 +60,6 @@ namespace Eigen {
 } // namespace Eigen
 #endif

+#include "src/Core/util/EnableMSVCWarnings.h"
+
 #endif // EIGEN_CHOLESKY_MODULE_H
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -1,18 +1,62 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
 #ifndef EIGEN_CORE_H
 #define EIGEN_CORE_H

+// first thing Eigen does: prevent MSVC from committing suicide
+#include "src/Core/util/DisableMSVCWarnings.h"
+
 #ifdef _MSC_VER
-#pragma warning( disable : 4181 4244 )
+  #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
+  #if (_MSC_VER >= 1500) // 2008 or later
+    // Remember that usage of defined() in a #define is undefined by the standard.
+    // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
+    #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64)
+      #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
+    #endif
+  #endif
 #endif

 #ifdef __GNUC__
-#define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__>=x && __GNUC_MINOR__>=y) || __GNUC__>x)
+  #define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__>=x && __GNUC_MINOR__>=y) || __GNUC__>x)
 #else
-#define EIGEN_GNUC_AT_LEAST(x,y) 0
+  #define EIGEN_GNUC_AT_LEAST(x,y) 0
+#endif
+
+// Remember that usage of defined() in a #define is undefined by the standard
+#if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
+  #define EIGEN_SSE2_BUT_NOT_OLD_GCC
+#endif
+
+#ifdef EIGEN_DONT_ALIGN
+  #define EIGEN_DONT_VECTORIZE
 #endif

 #ifndef EIGEN_DONT_VECTORIZE
-  #if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
+  #if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
    #define EIGEN_VECTORIZE
    #define EIGEN_VECTORIZE_SSE
    #include <emmintrin.h>
@@ -23,11 +67,11 @@
    #ifdef __SSSE3__
      #include <tmmintrin.h>
    #endif
-  #elif (defined __ALTIVEC__)
+  #elif defined __ALTIVEC__
    #define EIGEN_VECTORIZE
    #define EIGEN_VECTORIZE_ALTIVEC
    #include <altivec.h>
-    // We _need_ to #undef all these ugly tokens defined in <altivec.h>
+    // We need to #undef all these ugly tokens defined in <altivec.h>
    // => use __vector instead of vector
    #undef bool
    #undef vector
@@ -43,6 +87,22 @@
 #include <iostream>
 #include <cstring>
 #include <string>
+#include <limits>
+
+#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_NO_EXCEPTIONS)
+  #define EIGEN_EXCEPTIONS
+#endif
+
+#ifdef EIGEN_EXCEPTIONS
+  #include <new>
+#endif
+
+// this needs to be done after all possible windows C header includes and before any Eigen source includes
+// (system C++ includes are supposed to be able to deal with this already):
+// windows.h defines min and max macros which would make Eigen fail to compile.
+#if defined(min) || defined(max)
+#error The preprocessor symbols 'min' or 'max' are defined. If you are compiling on Windows, do #define NOMINMAX to prevent windows.h from defining these symbols.
+#endif

 namespace Eigen {

@@ -69,9 +129,10 @@ namespace Eigen {
 #include "src/Core/GenericPacketMath.h"

 #if defined EIGEN_VECTORIZE_SSE
-#include "src/Core/arch/SSE/PacketMath.h"
+  #include "src/Core/arch/SSE/PacketMath.h"
+  #include "src/Core/arch/SSE/MathFunctions.h"
 #elif defined EIGEN_VECTORIZE_ALTIVEC
-#include "src/Core/arch/AltiVec/PacketMath.h"
+  #include "src/Core/arch/AltiVec/PacketMath.h"
 #endif

 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
@@ -81,12 +142,15 @@ namespace Eigen {
 #include "src/Core/Functors.h"
 #include "src/Core/MatrixBase.h"
 #include "src/Core/Coeffs.h"
+
 #ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874
                                // at least confirmed with Doxygen 1.5.5 and 1.5.6
-#include "src/Core/Assign.h"
+  #include "src/Core/Assign.h"
 #endif
+
 #include "src/Core/MatrixStorage.h"
 #include "src/Core/NestByValue.h"
+#include "src/Core/ReturnByValue.h"
 #include "src/Core/Flagged.h"
 #include "src/Core/Matrix.h"
 #include "src/Core/Cwise.h"
@@ -94,7 +158,6 @@ namespace Eigen {
 #include "src/Core/CwiseUnaryOp.h"
 #include "src/Core/CwiseNullaryOp.h"
 #include "src/Core/Dot.h"
-#include "src/Core/Product.h"
 #include "src/Core/DiagonalProduct.h"
 #include "src/Core/SolveTriangular.h"
 #include "src/Core/MapBase.h"
@@ -103,8 +166,7 @@ namespace Eigen {
 #include "src/Core/Minor.h"
 #include "src/Core/Transpose.h"
 #include "src/Core/DiagonalMatrix.h"
-#include "src/Core/DiagonalCoeffs.h"
-#include "src/Core/Sum.h"
+#include "src/Core/Diagonal.h"
 #include "src/Core/Redux.h"
 #include "src/Core/Visitor.h"
 #include "src/Core/Fuzzy.h"
@@ -112,8 +174,13 @@ namespace Eigen {
 #include "src/Core/Swap.h"
 #include "src/Core/CommaInitializer.h"
 #include "src/Core/Part.h"
-#include "src/Core/CacheFriendlyProduct.h"
+#include "src/Core/Product.h"
+#include "src/Core/products/GeneralMatrixMatrix.h"
+#include "src/Core/products/GeneralMatrixVector.h"
+#include "src/Core/products/SelfadjointMatrixVector.h"

 } // namespace Eigen

+#include "src/Core/util/EnableMSVCWarnings.h"
+
 #endif // EIGEN_CORE_H
--- a/Eigen/Geometry
+++ b/Eigen/Geometry
@@ -1,7 +1,12 @@
 #ifndef EIGEN_GEOMETRY_MODULE_H
 #define EIGEN_GEOMETRY_MODULE_H

+#include "Core"
+
+#include "src/Core/util/DisableMSVCWarnings.h"
+
 #include "Array"
+#include <limits>

 #ifndef M_PI
 #define M_PI 3.14159265358979323846
@@ -9,7 +14,10 @@

 namespace Eigen {

-/** \defgroup GeometryModule Geometry module
+/** \defgroup Geometry_Module Geometry module
+  *
+  * \nonstableyet
+  *
  * This module provides support for:
  *  - fixed-size homogeneous transformations
  *  - translation, scaling, 2D and 3D rotations
@@ -24,16 +32,25 @@ namespace Eigen {
  */

 #include "src/Geometry/OrthoMethods.h"
+#include "src/Geometry/Homogeneous.h"
 #include "src/Geometry/RotationBase.h"
 #include "src/Geometry/Rotation2D.h"
 #include "src/Geometry/Quaternion.h"
 #include "src/Geometry/AngleAxis.h"
+#include "src/Geometry/EulerAngles.h"
 #include "src/Geometry/Transform.h"
 #include "src/Geometry/Translation.h"
 #include "src/Geometry/Scaling.h"
 #include "src/Geometry/Hyperplane.h"
 #include "src/Geometry/ParametrizedLine.h"
+#include "src/Geometry/AlignedBox.h"
+
+#if defined EIGEN_VECTORIZE_SSE
+  #include "src/Geometry/arch/Geometry_SSE.h"
+#endif

 } // namespace Eigen

+#include "src/Core/util/EnableMSVCWarnings.h"
+
 #endif // EIGEN_GEOMETRY_MODULE_H
--- a/Eigen/LU
+++ b/Eigen/LU
@@ -3,6 +3,8 @@

 #include "Core"

+#include "src/Core/util/DisableMSVCWarnings.h"
+
 namespace Eigen {

 /** \defgroup LU_Module LU module
@@ -17,9 +19,12 @@ namespace Eigen {
  */

 #include "src/LU/LU.h"
+#include "src/LU/PartialLU.h"
 #include "src/LU/Determinant.h"
 #include "src/LU/Inverse.h"

 } // namespace Eigen

+#include "src/Core/util/EnableMSVCWarnings.h"
+
 #endif // EIGEN_LU_MODULE_H
--- a/Eigen/LeastSquares
+++ b/Eigen/LeastSquares
@@ -1,22 +1,27 @@
 #ifndef EIGEN_REGRESSION_MODULE_H
 #define EIGEN_REGRESSION_MODULE_H

-#include "LU"
+#include "Core"
+
+#include "src/Core/util/DisableMSVCWarnings.h"
+
 #include "QR"
 #include "Geometry"

 namespace Eigen {

-/** \defgroup Regression_Module Regression module
+/** \defgroup LeastSquares_Module LeastSquares module
  * This module provides linear regression and related features.
  *
  * \code
-  * #include <Eigen/Regression>
+  * #include <Eigen/LeastSquares>
  * \endcode
  */

-#include "src/Regression/Regression.h"
+#include "src/LeastSquares/LeastSquares.h"

 } // namespace Eigen

+#include "src/Core/util/EnableMSVCWarnings.h"
+
 #endif // EIGEN_REGRESSION_MODULE_H
--- a/Eigen/QR
+++ b/Eigen/QR
@@ -2,6 +2,9 @@
 #define EIGEN_QR_MODULE_H

 #include "Core"
+
+#include "src/Core/util/DisableMSVCWarnings.h"
+
 #include "Cholesky"

 // Note that EIGEN_HIDE_HEAVY_CODE has to be defined per module
@@ -16,6 +19,9 @@
 namespace Eigen {

 /** \defgroup QR_Module QR module
+  *
+  * \nonstableyet
+  *
  * This module mainly provides QR decomposition and an eigen value solver.
  * This module also provides some MatrixBase methods, including:
  *  - MatrixBase::qr(),
@@ -62,4 +68,6 @@ namespace Eigen {

 } // namespace Eigen

+#include "src/Core/util/EnableMSVCWarnings.h"
+
 #endif // EIGEN_QR_MODULE_H
--- a/Eigen/QtAlignedMalloc
+++ b/Eigen/QtAlignedMalloc
@@ -0,0 +1,29 @@
+
+#ifndef EIGEN_QTMALLOC_MODULE_H
+#define EIGEN_QTMALLOC_MODULE_H
+
+#include "Core"
+
+#if (!EIGEN_MALLOC_ALREADY_ALIGNED)
+
+inline void *qMalloc(size_t size)
+{
+  return Eigen::ei_aligned_malloc(size);
+}
+
+inline void qFree(void *ptr)
+{
+  Eigen::ei_aligned_free(ptr);
+}
+
+inline void *qRealloc(void *ptr, size_t size)
+{
+  void* newPtr = Eigen::ei_aligned_malloc(size);
+  memcpy(newPtr, ptr, size);
+  Eigen::ei_aligned_free(ptr);
+  return newPtr;
+}
+
+#endif
+
+#endif // EIGEN_QTMALLOC_MODULE_H
--- a/Eigen/SVD
+++ b/Eigen/SVD
@@ -3,9 +3,14 @@

 #include "Core"

+#include "src/Core/util/DisableMSVCWarnings.h"
+
 namespace Eigen {

 /** \defgroup SVD_Module SVD module
+  *
+  * \nonstableyet
+  *
  * This module provides SVD decomposition for (currently) real matrices.
  * This decomposition is accessible via the following MatrixBase method:
  *  - MatrixBase::svd()
@@ -19,4 +24,6 @@ namespace Eigen {

 } // namespace Eigen

+#include "src/Core/util/EnableMSVCWarnings.h"
+
 #endif // EIGEN_SVD_MODULE_H
--- a/Eigen/Sparse
+++ b/Eigen/Sparse
@@ -2,26 +2,132 @@
 #define EIGEN_SPARSE_MODULE_H

 #include "Core"
+
+#include "src/Core/util/DisableMSVCWarnings.h"
+
 #include <vector>
 #include <map>
 #include <cstdlib>
 #include <cstring>
 #include <algorithm>

+#ifdef EIGEN_GOOGLEHASH_SUPPORT
+  #include <google/dense_hash_map>
+#endif
+
+#ifdef EIGEN_CHOLMOD_SUPPORT
+  extern "C" {
+    #include "cholmod.h"
+  }
+#endif
+
+#ifdef EIGEN_TAUCS_SUPPORT
+  // taucs.h declares a lot of mess
+  #define isnan
+  #define finite
+  #define isinf
+  extern "C" {
+    #include "taucs.h"
+  }
+  #undef isnan
+  #undef finite
+  #undef isinf
+
+  #ifdef min
+    #undef min
+  #endif
+  #ifdef max
+    #undef max
+  #endif
+  #ifdef complex
+    #undef complex
+  #endif
+#endif
+
+#ifdef EIGEN_SUPERLU_SUPPORT
+  typedef int int_t;
+  #include "slu_Cnames.h"
+  #include "supermatrix.h"
+  #include "slu_util.h"
+
+  namespace SuperLU_S {
+  #include "slu_sdefs.h"
+  }
+  namespace SuperLU_D {
+  #include "slu_ddefs.h"
+  }
+  namespace SuperLU_C {
+  #include "slu_cdefs.h"
+  }
+  namespace SuperLU_Z {
+  #include "slu_zdefs.h"
+  }
+  namespace Eigen { struct SluMatrix; }
+#endif
+
+#ifdef EIGEN_UMFPACK_SUPPORT
+  #include "umfpack.h"
+#endif
+
 namespace Eigen {

+/** \defgroup Sparse_Module Sparse module
+  *
+  * \nonstableyet
+  *
+  * See the \ref TutorialSparse "Sparse tutorial"
+  *
+  * \code
+  * #include <Eigen/QR>
+  * \endcode
+  */
+
 #include "src/Sparse/SparseUtil.h"
 #include "src/Sparse/SparseMatrixBase.h"
-#include "src/Sparse/SparseArray.h"
+#include "src/Sparse/CompressedStorage.h"
+#include "src/Sparse/AmbiVector.h"
+#include "src/Sparse/RandomSetter.h"
 #include "src/Sparse/SparseBlock.h"
 #include "src/Sparse/SparseMatrix.h"
-#include "src/Sparse/HashMatrix.h"
-#include "src/Sparse/LinkedVectorMatrix.h"
+#include "src/Sparse/DynamicSparseMatrix.h"
+#include "src/Sparse/MappedSparseMatrix.h"
+#include "src/Sparse/SparseVector.h"
 #include "src/Sparse/CoreIterators.h"
-#include "src/Sparse/SparseSetter.h"
+#include "src/Sparse/SparseTranspose.h"
+#include "src/Sparse/SparseCwise.h"
+#include "src/Sparse/SparseCwiseUnaryOp.h"
+#include "src/Sparse/SparseCwiseBinaryOp.h"
+#include "src/Sparse/SparseDot.h"
+#include "src/Sparse/SparseAssign.h"
+#include "src/Sparse/SparseRedux.h"
+#include "src/Sparse/SparseFuzzy.h"
+#include "src/Sparse/SparseFlagged.h"
 #include "src/Sparse/SparseProduct.h"
+#include "src/Sparse/SparseDiagonalProduct.h"
+#include "src/Sparse/SparseTriangular.h"
 #include "src/Sparse/TriangularSolver.h"
+#include "src/Sparse/SparseLLT.h"
+#include "src/Sparse/SparseLDLT.h"
+#include "src/Sparse/SparseLU.h"
+
+#ifdef EIGEN_CHOLMOD_SUPPORT
+# include "src/Sparse/CholmodSupport.h"
+#endif
+
+#ifdef EIGEN_TAUCS_SUPPORT
+# include "src/Sparse/TaucsSupport.h"
+#endif
+
+#ifdef EIGEN_SUPERLU_SUPPORT
+# include "src/Sparse/SuperLUSupport.h"
+#endif
+
+#ifdef EIGEN_UMFPACK_SUPPORT
+# include "src/Sparse/UmfPackSupport.h"
+#endif

 } // namespace Eigen

+#include "src/Core/util/EnableMSVCWarnings.h"
+
 #endif // EIGEN_SPARSE_MODULE_H
--- a/Eigen/StdVector
+++ b/Eigen/StdVector
@@ -0,0 +1,167 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2009 Hauke Heibel <hauke.heibel@googlemail.com>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_STDVECTOR_MODULE_H
+#define EIGEN_STDVECTOR_MODULE_H
+
+#include "Core"
+#include <vector>
+
+namespace Eigen {
+
+// This one is needed to prevent reimplementing the whole std::vector.
+template <class T>
+class aligned_allocator_indirection : public aligned_allocator<T>
+{
+public:
+  typedef size_t    size_type;
+  typedef ptrdiff_t difference_type;
+  typedef T*        pointer;
+  typedef const T*  const_pointer;
+  typedef T&        reference;
+  typedef const T&  const_reference;
+  typedef T         value_type;
+
+  template<class U>
+  struct rebind
+  {
+    typedef aligned_allocator_indirection<U> other;
+  };
+
+  aligned_allocator_indirection() throw() {}
+  aligned_allocator_indirection(const aligned_allocator_indirection& ) throw() : aligned_allocator<T>() {}
+  aligned_allocator_indirection(const aligned_allocator<T>& ) throw() {}
+  template<class U>
+  aligned_allocator_indirection(const aligned_allocator_indirection<U>& ) throw() {}
+  template<class U>
+  aligned_allocator_indirection(const aligned_allocator<U>& ) throw() {}
+  ~aligned_allocator_indirection() throw() {}
+};
+
+#ifdef _MSC_VER
+
+  // sometimes, MSVC detects, at compile time, that the argument x
+  // in std::vector::resize(size_t s,T x) won't be aligned and generate an error
+  // even if this function is never called. Whence this little wrapper.
+  #define EIGEN_WORKAROUND_MSVC_STD_VECTOR(T) Eigen::ei_workaround_msvc_std_vector<T>
+  template<typename T> struct ei_workaround_msvc_std_vector : public T
+  {
+    inline ei_workaround_msvc_std_vector() : T() {}
+    inline ei_workaround_msvc_std_vector(const T& other) : T(other) {}
+    inline operator T& () { return *static_cast<T*>(this); }
+    inline operator const T& () const { return *static_cast<const T*>(this); }
+    template<typename OtherT>
+    inline T& operator=(const OtherT& other)
+    { T::operator=(other); return *this; }
+    inline ei_workaround_msvc_std_vector& operator=(const ei_workaround_msvc_std_vector& other)
+    { T::operator=(other); return *this; }
+  };
+
+#else
+
+  #define EIGEN_WORKAROUND_MSVC_STD_VECTOR(T) T
+
+#endif
+
+}
+
+namespace std {
+
+#define EIGEN_STD_VECTOR_SPECIALIZATION_BODY \
+  public:  \
+    typedef T value_type; \
+    typedef typename vector_base::allocator_type allocator_type; \
+    typedef typename vector_base::size_type size_type;  \
+    typedef typename vector_base::iterator iterator;  \
+    typedef typename vector_base::const_iterator const_iterator;  \
+    explicit vector(const allocator_type& a = allocator_type()) : vector_base(a) {}  \
+    template<typename InputIterator> \
+    vector(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) \
+    : vector_base(first, last, a) {} \
+    vector(const vector& c) : vector_base(c) {}  \
+    explicit vector(size_type num, const value_type& val = value_type()) : vector_base(num, val) {} \
+    vector(iterator start, iterator end) : vector_base(start, end) {}  \
+    vector& operator=(const vector& x) {  \
+      vector_base::operator=(x);  \
+      return *this;  \
+    }
+
+template<typename T>
+class vector<T,Eigen::aligned_allocator<T> >
+  : public vector<EIGEN_WORKAROUND_MSVC_STD_VECTOR(T),
+                  Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STD_VECTOR(T)> >
+{
+  typedef vector<EIGEN_WORKAROUND_MSVC_STD_VECTOR(T),
+                 Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STD_VECTOR(T)> > vector_base;
+  EIGEN_STD_VECTOR_SPECIALIZATION_BODY
+
+  void resize(size_type new_size)
+  { resize(new_size, T()); }
+
+#if defined(_VECTOR_)
+  // workaround MSVC std::vector implementation
+  void resize(size_type new_size, const value_type& x)
+  {
+    if (vector_base::size() < new_size)
+      vector_base::_Insert_n(vector_base::end(), new_size - vector_base::size(), x);
+    else if (new_size < vector_base::size())
+      vector_base::erase(vector_base::begin() + new_size, vector_base::end());
+  }
+  void push_back(const value_type& x)
+  { vector_base::push_back(x); } 
+  using vector_base::insert;  
+  iterator insert(const_iterator position, const value_type& x)
+  { return vector_base::insert(position,x); }
+  void insert(const_iterator position, size_type new_size, const value_type& x)
+  { vector_base::insert(position, new_size, x); }
+#elif defined(_GLIBCXX_VECTOR) && EIGEN_GNUC_AT_LEAST(4,1)
+  // workaround GCC std::vector implementation
+  // Note that before gcc-4.1 we already have: std::vector::resize(size_type,const T&),
+  // no no need to workaround !
+  void resize(size_type new_size, const value_type& x)
+  {
+    if (new_size < vector_base::size())
+      vector_base::_M_erase_at_end(this->_M_impl._M_start + new_size);
+    else
+      vector_base::insert(vector_base::end(), new_size - vector_base::size(), x);
+  }
+#elif defined(_GLIBCXX_VECTOR)
+  using vector_base::resize;
+#else
+  // default implementation which should always work.
+  void resize(size_type new_size, const value_type& x)
+  {
+    if (new_size < vector_base::size())
+      vector_base::erase(vector_base::begin() + new_size, vector_base::end());
+    else if (new_size > vector_base::size())
+      vector_base::insert(vector_base::end(), new_size - vector_base::size(), x);
+  }
+#endif
+
+};
+
+}
+
+#endif // EIGEN_STDVECTOR_MODULE_H
--- a/Eigen/src/Array/BooleanRedux.h
+++ b/Eigen/src/Array/BooleanRedux.h
@@ -89,7 +89,7 @@ struct ei_any_unroller<Derived, Dynamic>
  * \sa MatrixBase::any(), Cwise::operator<()
  */
 template<typename Derived>
-inline bool MatrixBase<Derived>::all(void) const
+inline bool MatrixBase<Derived>::all() const
 {
  const bool unroll = SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost)
                      <= EIGEN_UNROLLING_LIMIT;
@@ -99,8 +99,8 @@ inline bool MatrixBase<Derived>::all(void) const
     >::run(derived());
  else
  {
-    for(int j = 0; j < cols(); j++)
-      for(int i = 0; i < rows(); i++)
+    for(int j = 0; j < cols(); ++j)
+      for(int i = 0; i < rows(); ++i)
        if (!coeff(i, j)) return false;
    return true;
  }
@@ -113,7 +113,7 @@ inline bool MatrixBase<Derived>::all(void) const
  * \sa MatrixBase::all()
  */
 template<typename Derived>
-inline bool MatrixBase<Derived>::any(void) const
+inline bool MatrixBase<Derived>::any() const
 {
  const bool unroll = SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost)
                      <= EIGEN_UNROLLING_LIMIT;
@@ -123,11 +123,23 @@ inline bool MatrixBase<Derived>::any(void) const
           >::run(derived());
  else
  {
-    for(int j = 0; j < cols(); j++)
-      for(int i = 0; i < rows(); i++)
+    for(int j = 0; j < cols(); ++j)
+      for(int i = 0; i < rows(); ++i)
        if (coeff(i, j)) return true;
    return false;
  }
 }

+/** \array_module
+  * 
+  * \returns the number of coefficients which evaluate to true
+  *
+  * \sa MatrixBase::all(), MatrixBase::any()
+  */
+template<typename Derived>
+inline int MatrixBase<Derived>::count() const
+{
+  return this->cast<bool>().cast<int>().sum();
+}
+
 #endif // EIGEN_ALLANDANY_H
--- a/Eigen/src/Array/CMakeLists.txt
+++ b/Eigen/src/Array/CMakeLists.txt
@@ -2,5 +2,5 @@ FILE(GLOB Eigen_Array_SRCS "*.h")

 INSTALL(FILES
  ${Eigen_Array_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Array
+  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Array COMPONENT Devel
  )
--- a/Eigen/src/Array/CwiseOperators.h
+++ b/Eigen/src/Array/CwiseOperators.h
@@ -82,7 +82,7 @@ Cwise<ExpressionType>::log() const
  * Example: \include Cwise_cos.cpp
  * Output: \verbinclude Cwise_cos.out
  *
-  * \sa sin(), exp()
+  * \sa sin(), exp(), EIGEN_FAST_MATH
  */
 template<typename ExpressionType>
 inline const EIGEN_CWISE_UNOP_RETURN_TYPE(ei_scalar_cos_op)
@@ -99,7 +99,7 @@ Cwise<ExpressionType>::cos() const
  * Example: \include Cwise_sin.cpp
  * Output: \verbinclude Cwise_sin.out
  *
-  * \sa cos(), exp()
+  * \sa cos(), exp(), EIGEN_FAST_MATH
  */
 template<typename ExpressionType>
 inline const EIGEN_CWISE_UNOP_RETURN_TYPE(ei_scalar_sin_op)
--- a/Eigen/src/Array/Functors.h
+++ b/Eigen/src/Array/Functors.h
@@ -58,10 +58,16 @@ struct ei_functor_traits<ei_scalar_add_op<Scalar> >
  */
 template<typename Scalar> struct ei_scalar_sqrt_op EIGEN_EMPTY_STRUCT {
  inline const Scalar operator() (const Scalar& a) const { return ei_sqrt(a); }
+  typedef typename ei_packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return ei_psqrt(a); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_sqrt_op<Scalar> >
-{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
+{ enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = ei_packet_traits<Scalar>::HasSqrt
+  };
+};

 /** \internal
  *
@@ -73,10 +79,12 @@ struct ei_functor_traits<ei_scalar_sqrt_op<Scalar> >
  */
 template<typename Scalar> struct ei_scalar_exp_op EIGEN_EMPTY_STRUCT {
  inline const Scalar operator() (const Scalar& a) const { return ei_exp(a); }
+  typedef typename ei_packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return ei_pexp(a); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_exp_op<Scalar> >
-{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasExp }; };

 /** \internal
  *
@@ -88,10 +96,12 @@ struct ei_functor_traits<ei_scalar_exp_op<Scalar> >
  */
 template<typename Scalar> struct ei_scalar_log_op EIGEN_EMPTY_STRUCT {
  inline const Scalar operator() (const Scalar& a) const { return ei_log(a); }
+  typedef typename ei_packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return ei_plog(a); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_log_op<Scalar> >
-{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasLog }; };

 /** \internal
  *
@@ -102,11 +112,18 @@ struct ei_functor_traits<ei_scalar_log_op<Scalar> >
  * \sa class CwiseUnaryOp, Cwise::cos()
  */
 template<typename Scalar> struct ei_scalar_cos_op EIGEN_EMPTY_STRUCT {
-  inline const Scalar operator() (const Scalar& a) const { return ei_cos(a); }
+  inline Scalar operator() (const Scalar& a) const { return ei_cos(a); }
+  typedef typename ei_packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return ei_pcos(a); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_cos_op<Scalar> >
-{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = ei_packet_traits<Scalar>::HasCos && EIGEN_FAST_MATH
+  };
+};

 /** \internal
  *
@@ -118,10 +135,17 @@ struct ei_functor_traits<ei_scalar_cos_op<Scalar> >
  */
 template<typename Scalar> struct ei_scalar_sin_op EIGEN_EMPTY_STRUCT {
  inline const Scalar operator() (const Scalar& a) const { return ei_sin(a); }
+  typedef typename ei_packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return ei_psin(a); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_sin_op<Scalar> >
-{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = ei_packet_traits<Scalar>::HasSin && EIGEN_FAST_MATH
+  };
+};

 /** \internal
  *
@@ -200,7 +224,6 @@ template<typename Scalar>
 struct ei_functor_traits<ei_scalar_cube_op<Scalar> >
 { enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = int(ei_packet_traits<Scalar>::size)>1 }; };

-
 // default ei_functor_traits for STL functors:

 template<typename T>
--- a/Eigen/src/Array/Norms.h
+++ b/Eigen/src/Array/Norms.h
@@ -0,0 +1,80 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_ARRAY_NORMS_H
+#define EIGEN_ARRAY_NORMS_H
+
+template<typename Derived, int p>
+struct ei_lpNorm_selector
+{
+  typedef typename NumTraits<typename ei_traits<Derived>::Scalar>::Real RealScalar;
+  inline static RealScalar run(const MatrixBase<Derived>& m)
+  {
+    return ei_pow(m.cwise().abs().cwise().pow(p).sum(), RealScalar(1)/p);
+  }
+};
+
+template<typename Derived>
+struct ei_lpNorm_selector<Derived, 1>
+{
+  inline static typename NumTraits<typename ei_traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
+  {
+    return m.cwise().abs().sum();
+  }
+};
+
+template<typename Derived>
+struct ei_lpNorm_selector<Derived, 2>
+{
+  inline static typename NumTraits<typename ei_traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
+  {
+    return m.norm();
+  }
+};
+
+template<typename Derived>
+struct ei_lpNorm_selector<Derived, Infinity>
+{
+  inline static typename NumTraits<typename ei_traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
+  {
+    return m.cwise().abs().maxCoeff();
+  }
+};
+
+/** \array_module
+  * 
+  * \returns the \f$ \ell^p \f$ norm of *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values
+  *          of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^p\infty \f$
+  *          norm, that is the maximum of the absolute values of the coefficients of *this.
+  *
+  * \sa norm()
+  */
+template<typename Derived>
+template<int p>
+inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::lpNorm() const
+{
+  return ei_lpNorm_selector<Derived, p>::run(*this);
+}
+
+#endif // EIGEN_ARRAY_NORMS_H
--- a/Eigen/src/Array/PartialRedux.h
+++ b/Eigen/src/Array/PartialRedux.h
@@ -1,8 +1,8 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -26,7 +26,7 @@
 #ifndef EIGEN_PARTIAL_REDUX_H
 #define EIGEN_PARTIAL_REDUX_H

-/** \array_module \ingroup Array
+/** \array_module \ingroup Array_Module
  *
  * \class PartialReduxExpr
  *
@@ -61,7 +61,11 @@ struct ei_traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
    Flags = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits,
    TraversalSize = Direction==Vertical ? RowsAtCompileTime : ColsAtCompileTime
  };
+  #if EIGEN_GNUC_AT_LEAST(3,4)
  typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
+  #else
+  typedef typename MemberOp::template Cost<InputScalar,TraversalSize> CostOpType;
+  #endif
  enum {
    CoeffReadCost = TraversalSize * ei_traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
  };
@@ -104,16 +108,19 @@ class PartialReduxExpr : ei_no_assignment_operator,
    { enum { value = COST }; };                                     \
    template<typename Derived>                                      \
    inline ResultType operator()(const MatrixBase<Derived>& mat) const     \
-    { return mat.MEMBER(); }                                        \
+    { return mat.MEMBER(); } \
  }

-EIGEN_MEMBER_FUNCTOR(norm2, Size * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(squaredNorm, Size * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(sum, (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(minCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
+

 /** \internal */
 template <typename BinaryOp, typename Scalar>
@@ -130,7 +137,7 @@ struct ei_member_redux {
  const BinaryOp m_functor;
 };

-/** \array_module \ingroup Array
+/** \array_module \ingroup Array_Module
  *
  * \class PartialRedux
  *
@@ -204,11 +211,11 @@ template<typename ExpressionType, int Direction> class PartialRedux
    /** \returns a row (or column) vector expression of the squared norm
      * of each column (or row) of the referenced expression.
      *
-      * Example: \include PartialRedux_norm2.cpp
-      * Output: \verbinclude PartialRedux_norm2.out
+      * Example: \include PartialRedux_squaredNorm.cpp
+      * Output: \verbinclude PartialRedux_squaredNorm.out
      *
-      * \sa MatrixBase::norm2() */
-    const typename ReturnType<ei_member_norm2>::Type norm2() const
+      * \sa MatrixBase::squaredNorm() */
+    const typename ReturnType<ei_member_squaredNorm>::Type squaredNorm() const
    { return _expression(); }

    /** \returns a row (or column) vector expression of the norm
@@ -245,6 +252,78 @@ template<typename ExpressionType, int Direction> class PartialRedux
    const typename ReturnType<ei_member_any>::Type any() const
    { return _expression(); }

+    /** \returns a row (or column) vector expression representing
+      * the number of \c true coefficients of each respective column (or row).
+      *
+      * Example: \include PartialRedux_count.cpp
+      * Output: \verbinclude PartialRedux_count.out
+      *
+      * \sa MatrixBase::count() */
+    const PartialReduxExpr<ExpressionType, ei_member_count<int>, Direction> count() const
+    { return _expression(); }
+
+    /** \returns a row (or column) vector expression of the product
+      * of each column (or row) of the referenced expression.
+      *
+      * Example: \include PartialRedux_prod.cpp
+      * Output: \verbinclude PartialRedux_prod.out
+      *
+      * \sa MatrixBase::prod() */
+    const typename ReturnType<ei_member_prod>::Type prod() const
+    { return _expression(); }
+
+
+    /** \returns a matrix expression
+      * where each column (or row) are reversed.
+      *
+      * Example: \include PartialRedux_reverse.cpp
+      * Output: \verbinclude PartialRedux_reverse.out
+      *
+      * \sa MatrixBase::reverse() */
+    const Reverse<ExpressionType, Direction> reverse() const
+    {
+      return Reverse<ExpressionType, Direction>( _expression() );
+    }
+
+    const Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1>
+    replicate(int factor) const;
+
+    template<int Factor>
+    const Replicate<ExpressionType,(Direction==Vertical?Factor:1),(Direction==Horizontal?Factor:1)>
+    replicate(int factor = Factor) const;
+
+/////////// Geometry module ///////////
+
+    const Homogeneous<ExpressionType,Direction> homogeneous() const;
+
+    typedef typename ExpressionType::PlainMatrixType CrossReturnType;
+    template<typename OtherDerived>
+    const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;
+
+    enum {
+      HNormalized_Size = Direction==Vertical ? ei_traits<ExpressionType>::RowsAtCompileTime
+                                             : ei_traits<ExpressionType>::ColsAtCompileTime,
+      HNormalized_SizeMinusOne = HNormalized_Size==Dynamic ? Dynamic : HNormalized_Size-1
+    };
+    typedef Block<ExpressionType,
+                  Direction==Vertical   ? int(HNormalized_SizeMinusOne)
+                                        : int(ei_traits<ExpressionType>::RowsAtCompileTime),
+                  Direction==Horizontal ? int(HNormalized_SizeMinusOne)
+                                        : int(ei_traits<ExpressionType>::ColsAtCompileTime)>
+            HNormalized_Block;
+    typedef Block<ExpressionType,
+                  Direction==Vertical   ? 1 : int(ei_traits<ExpressionType>::RowsAtCompileTime),
+                  Direction==Horizontal ? 1 : int(ei_traits<ExpressionType>::ColsAtCompileTime)>
+            HNormalized_Factors;
+    typedef CwiseBinaryOp<ei_scalar_quotient_op<typename ei_traits<ExpressionType>::Scalar>,
+                NestByValue<HNormalized_Block>,
+                NestByValue<Replicate<NestByValue<HNormalized_Factors>,
+                  Direction==Vertical   ? HNormalized_SizeMinusOne : 1,
+                  Direction==Horizontal ? HNormalized_SizeMinusOne : 1> > >
+            HNormalizedReturnType;
+
+    const HNormalizedReturnType hnormalized() const;
+
  protected:
    ExpressionTypeNested m_matrix;
 };
--- a/Eigen/src/Array/Random.h
+++ b/Eigen/src/Array/Random.h
@@ -110,7 +110,7 @@ MatrixBase<Derived>::Random()
  * Example: \include MatrixBase_setRandom.cpp
  * Output: \verbinclude MatrixBase_setRandom.out
  *
-  * \sa class CwiseNullaryOp, MatrixBase::setRandom(int,int)
+  * \sa class CwiseNullaryOp, setRandom(int), setRandom(int,int)
  */
 template<typename Derived>
 inline Derived& MatrixBase<Derived>::setRandom()
@@ -118,4 +118,39 @@ inline Derived& MatrixBase<Derived>::setRandom()
  return *this = Random(rows(), cols());
 }

+/** Resizes to the given \a size, and sets all coefficients in this expression to random values.
+  *
+  * \only_for_vectors
+  *
+  * Example: \include Matrix_setRandom_int.cpp
+  * Output: \verbinclude Matrix_setRandom_int.out
+  *
+  * \sa MatrixBase::setRandom(), setRandom(int,int), class CwiseNullaryOp, MatrixBase::Random()
+  */
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+EIGEN_STRONG_INLINE Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>&
+Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::setRandom(int size)
+{
+  resize(size);
+  return setRandom();
+}
+
+/** Resizes to the given size, and sets all coefficients in this expression to random values.
+  *
+  * \param rows the new number of rows
+  * \param cols the new number of columns
+  *
+  * Example: \include Matrix_setRandom_int_int.cpp
+  * Output: \verbinclude Matrix_setRandom_int_int.out
+  *
+  * \sa MatrixBase::setRandom(), setRandom(int), class CwiseNullaryOp, MatrixBase::Random()
+  */
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+EIGEN_STRONG_INLINE Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>&
+Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::setRandom(int rows, int cols)
+{
+  resize(rows, cols);
+  return setRandom();
+}
+
 #endif // EIGEN_RANDOM_H
--- a/Eigen/src/Array/Replicate.h
+++ b/Eigen/src/Array/Replicate.h
@@ -0,0 +1,160 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_REPLICATE_H
+#define EIGEN_REPLICATE_H
+
+/** \nonstableyet
+  * \class Replicate
+  *
+  * \brief Expression of the multiple replication of a matrix or vector
+  *
+  * \param MatrixType the type of the object we are replicating
+  *
+  * This class represents an expression of the multiple replication of a matrix or vector.
+  * It is the return type of MatrixBase::replicate() and most of the time
+  * this is the only way it is used.
+  *
+  * \sa MatrixBase::replicate()
+  */
+template<typename MatrixType,int RowFactor,int ColFactor>
+struct ei_traits<Replicate<MatrixType,RowFactor,ColFactor> >
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
+  typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
+  enum {
+    RowsPlusOne = (MatrixType::RowsAtCompileTime != Dynamic) ?
+                  int(MatrixType::RowsAtCompileTime) + 1 : Dynamic,
+    ColsPlusOne = (MatrixType::ColsAtCompileTime != Dynamic) ?
+                  int(MatrixType::ColsAtCompileTime) + 1 : Dynamic,
+    RowsAtCompileTime = RowFactor==Dynamic || MatrixType::RowsAtCompileTime==Dynamic
+                      ? Dynamic
+                      : RowFactor * MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = ColFactor==Dynamic || MatrixType::ColsAtCompileTime==Dynamic
+                      ? Dynamic
+                      : ColFactor * MatrixType::ColsAtCompileTime,
+    MaxRowsAtCompileTime = RowsAtCompileTime,
+    MaxColsAtCompileTime = ColsAtCompileTime,
+    Flags = _MatrixTypeNested::Flags & HereditaryBits,
+    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+  };
+};
+
+template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
+  : public MatrixBase<Replicate<MatrixType,RowFactor,ColFactor> >
+{
+  public:
+
+    EIGEN_GENERIC_PUBLIC_INTERFACE(Replicate)
+
+    inline Replicate(const MatrixType& matrix)
+      : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
+    {
+      ei_assert(RowFactor!=Dynamic && ColFactor!=Dynamic);
+    }
+
+    inline Replicate(const MatrixType& matrix, int rowFactor, int colFactor)
+      : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
+    {}
+
+    inline int rows() const { return m_matrix.rows() * m_rowFactor.value(); }
+    inline int cols() const { return m_matrix.cols() * m_colFactor.value(); }
+
+    inline Scalar coeff(int row, int col) const
+    {
+      return m_matrix.coeff(row%m_matrix.rows(), col%m_matrix.cols());
+    }
+
+  protected:
+    const typename MatrixType::Nested m_matrix;
+    const ei_int_if_dynamic<RowFactor> m_rowFactor;
+    const ei_int_if_dynamic<ColFactor> m_colFactor;
+};
+
+/** \nonstableyet
+  * \return an expression of the replication of \c *this
+  *
+  * Example: \include MatrixBase_replicate.cpp
+  * Output: \verbinclude MatrixBase_replicate.out
+  *
+  * \sa PartialRedux::replicate(), MatrixBase::replicate(int,int), class Replicate
+  */
+template<typename Derived>
+template<int RowFactor, int ColFactor>
+inline const Replicate<Derived,RowFactor,ColFactor>
+MatrixBase<Derived>::replicate() const
+{
+  return derived();
+}
+
+/** \nonstableyet
+  * \return an expression of the replication of \c *this
+  *
+  * Example: \include MatrixBase_replicate_int_int.cpp
+  * Output: \verbinclude MatrixBase_replicate_int_int.out
+  *
+  * \sa PartialRedux::replicate(), MatrixBase::replicate<int,int>(), class Replicate
+  */
+template<typename Derived>
+inline const Replicate<Derived,Dynamic,Dynamic>
+MatrixBase<Derived>::replicate(int rowFactor,int colFactor) const
+{
+  return Replicate<Derived,Dynamic,Dynamic>(derived(),rowFactor,colFactor);
+}
+
+/** \nonstableyet
+  * \return an expression of the replication of each column (or row) of \c *this
+  *
+  * Example: \include DirectionWise_replicate_int.cpp
+  * Output: \verbinclude DirectionWise_replicate_int.out
+  *
+  * \sa PartialRedux::replicate(), MatrixBase::replicate(), class Replicate
+  */
+template<typename ExpressionType, int Direction>
+const Replicate<ExpressionType,(Direction==Vertical?Dynamic:1),(Direction==Horizontal?Dynamic:1)>
+PartialRedux<ExpressionType,Direction>::replicate(int factor) const
+{
+  return Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1>
+          (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
+}
+
+/** \nonstableyet
+  * \return an expression of the replication of each column (or row) of \c *this
+  *
+  * Example: \include DirectionWise_replicate.cpp
+  * Output: \verbinclude DirectionWise_replicate.out
+  *
+  * \sa PartialRedux::replicate(int), MatrixBase::replicate(), class Replicate
+  */
+template<typename ExpressionType, int Direction>
+template<int Factor>
+const Replicate<ExpressionType,(Direction==Vertical?Factor:1),(Direction==Horizontal?Factor:1)>
+PartialRedux<ExpressionType,Direction>::replicate(int factor) const
+{
+  return Replicate<ExpressionType,Direction==Vertical?Factor:1,Direction==Horizontal?Factor:1>
+          (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
+}
+
+#endif // EIGEN_REPLICATE_H
--- a/Eigen/src/Array/Reverse.h
+++ b/Eigen/src/Array/Reverse.h
@@ -0,0 +1,202 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Ricard Marxer <email@ricardmarxer.com>
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_REVERSE_H
+#define EIGEN_REVERSE_H
+
+/** \array_module \ingroup Array_Module
+  *
+  * \class Reverse
+  *
+  * \brief Expression of the reverse of a vector or matrix
+  *
+  * \param MatrixType the type of the object of which we are taking the reverse
+  *
+  * This class represents an expression of the reverse of a vector.
+  * It is the return type of MatrixBase::reverse() and PartialRedux::reverse()
+  * and most of the time this is the only way it is used.
+  *
+  * \sa MatrixBase::reverse(), PartialRedux::reverse()
+  */
+template<typename MatrixType, int Direction>
+struct ei_traits<Reverse<MatrixType, Direction> >
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
+  typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
+  enum {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+
+    // let's enable LinearAccess only with vectorization because of the product overhead
+    LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) )
+                 ? LinearAccessBit : 0,
+
+    Flags = (int(_MatrixTypeNested::Flags) & (HereditaryBits | PacketAccessBit | LinearAccess))
+          | (int(_MatrixTypeNested::Flags)&UpperTriangularBit ? LowerTriangularBit : 0)
+          | (int(_MatrixTypeNested::Flags)&LowerTriangularBit ? UpperTriangularBit : 0),
+
+    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+  };
+};
+
+template<typename PacketScalar, bool ReversePacket> struct ei_reverse_packet_cond
+{
+  static inline PacketScalar run(const PacketScalar& x) { return ei_preverse(x); }
+};
+template<typename PacketScalar> struct ei_reverse_packet_cond<PacketScalar,false>
+{
+  static inline PacketScalar run(const PacketScalar& x) { return x; }
+};
+
+template<typename MatrixType, int Direction> class Reverse
+  : public MatrixBase<Reverse<MatrixType, Direction> >
+{
+  public:
+
+    EIGEN_GENERIC_PUBLIC_INTERFACE(Reverse)
+
+  protected:
+    enum {
+      PacketSize = ei_packet_traits<Scalar>::size,
+      IsRowMajor = Flags & RowMajorBit,
+      IsColMajor = !IsRowMajor,
+      ReverseRow = (Direction == Vertical)   || (Direction == BothDirections),
+      ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
+      OffsetRow  = ReverseRow && IsColMajor ? PacketSize : 1,
+      OffsetCol  = ReverseCol && IsRowMajor ? PacketSize : 1,
+      ReversePacket = (Direction == BothDirections)
+                    || ((Direction == Vertical)   && IsColMajor)
+                    || ((Direction == Horizontal) && IsRowMajor)
+    };
+    typedef ei_reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
+  public:
+
+    inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }
+
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse)
+
+    inline int rows() const { return m_matrix.rows(); }
+    inline int cols() const { return m_matrix.cols(); }
+
+    inline Scalar& coeffRef(int row, int col)
+    {
+      return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row,
+                                                    ReverseCol ? m_matrix.cols() - col - 1 : col);
+    }
+
+    inline const Scalar coeff(int row, int col) const
+    {
+      return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row,
+                            ReverseCol ? m_matrix.cols() - col - 1 : col);
+    }
+
+    inline const Scalar coeff(int index) const
+    {
+      return m_matrix.coeff(m_matrix.size() - index - 1);
+    }
+
+    inline Scalar& coeffRef(int index)
+    {
+      return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1);
+    }
+
+    template<int LoadMode>
+    inline const PacketScalar packet(int row, int col) const
+    {
+      return reverse_packet::run(m_matrix.template packet<LoadMode>(
+                                    ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
+                                    ReverseCol ? m_matrix.cols() - col - OffsetCol : col));
+    }
+
+    template<int LoadMode>
+    inline void writePacket(int row, int col, const PacketScalar& x)
+    {
+      m_matrix.const_cast_derived().template writePacket<LoadMode>(
+                                      ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
+                                      ReverseCol ? m_matrix.cols() - col - OffsetCol : col,
+                                      reverse_packet::run(x));
+    }
+
+    template<int LoadMode>
+    inline const PacketScalar packet(int index) const
+    {
+      return ei_preverse(m_matrix.template packet<LoadMode>( m_matrix.size() - index - PacketSize ));
+    }
+
+    template<int LoadMode>
+    inline void writePacket(int index, const PacketScalar& x)
+    {
+      m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, ei_preverse(x));
+    }
+
+  protected:
+    const typename MatrixType::Nested m_matrix;
+};
+
+/** \returns an expression of the reverse of *this.
+  *
+  * Example: \include MatrixBase_reverse.cpp
+  * Output: \verbinclude MatrixBase_reverse.out
+  *
+  */
+template<typename Derived>
+inline Reverse<Derived, BothDirections>
+MatrixBase<Derived>::reverse()
+{
+  return derived();
+}
+
+/** This is the const version of reverse(). */
+template<typename Derived>
+inline const Reverse<Derived, BothDirections>
+MatrixBase<Derived>::reverse() const
+{
+  return derived();
+}
+
+/** This is the "in place" version of reverse: it reverses \c *this.
+  *
+  * In most cases it is probably better to simply use the reversed expression
+  * of a matrix. However, when reversing the matrix data itself is really needed,
+  * then this "in-place" version is probably the right choice because it provides
+  * the following additional features:
+  *  - less error prone: doing the same operation with .reverse() requires special care:
+  *    \code m = m.reverse().eval(); \endcode
+  *  - no temporary object is created (currently there is one created but could be avoided using swap)
+  *  - it allows future optimizations (cache friendliness, etc.)
+  *
+  * \sa reverse() */
+template<typename Derived>
+inline void MatrixBase<Derived>::reverseInPlace()
+{
+  derived() = derived().reverse().eval();
+}
+
+
+#endif // EIGEN_REVERSE_H
--- a/Eigen/src/Array/Select.h
+++ b/Eigen/src/Array/Select.h
@@ -25,7 +25,7 @@
 #ifndef EIGEN_SELECT_H
 #define EIGEN_SELECT_H

-/** \array_module \ingroup Array
+/** \array_module \ingroup Array_Module
  *
  * \class Select
  *
@@ -45,15 +45,18 @@ template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMat
 struct ei_traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
 {
  typedef typename ei_traits<ThenMatrixType>::Scalar Scalar;
+  typedef typename ConditionMatrixType::Nested ConditionMatrixNested;
+  typedef typename ThenMatrixType::Nested ThenMatrixNested;
+  typedef typename ElseMatrixType::Nested ElseMatrixNested;
  enum {
    RowsAtCompileTime = ConditionMatrixType::RowsAtCompileTime,
    ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime,
    MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime,
    Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits,
-    CoeffReadCost = ei_traits<ConditionMatrixType>::CoeffReadCost
-                  + EIGEN_ENUM_MAX(ei_traits<ThenMatrixType>::CoeffReadCost,
-                                   ei_traits<ElseMatrixType>::CoeffReadCost)
+	CoeffReadCost = ei_traits<typename ei_cleantype<ConditionMatrixNested>::type>::CoeffReadCost
+	+ EIGEN_ENUM_MAX(ei_traits<typename ei_cleantype<ThenMatrixNested>::type>::CoeffReadCost,
+	                 ei_traits<typename ei_cleantype<ElseMatrixNested>::type>::CoeffReadCost)
  };
 };

@@ -105,6 +108,9 @@ class Select : ei_no_assignment_operator,
  * \returns a matrix where each coefficient (i,j) is equal to \a thenMatrix(i,j)
  * if \c *this(i,j), and \a elseMatrix(i,j) otherwise.
  *
+  * Example: \include MatrixBase_select.cpp
+  * Output: \verbinclude MatrixBase_select.out
+  *
  * \sa class Select
  */
 template<typename Derived>
--- a/Eigen/src/CMakeLists.txt
+++ b/Eigen/src/CMakeLists.txt
@@ -5,5 +5,5 @@ ADD_SUBDIRECTORY(SVD)
 ADD_SUBDIRECTORY(Cholesky)
 ADD_SUBDIRECTORY(Array)
 ADD_SUBDIRECTORY(Geometry)
-ADD_SUBDIRECTORY(Regression)
+ADD_SUBDIRECTORY(LeastSquares)
 ADD_SUBDIRECTORY(Sparse)
--- a/Eigen/src/Cholesky/CMakeLists.txt
+++ b/Eigen/src/Cholesky/CMakeLists.txt
@@ -2,5 +2,5 @@ FILE(GLOB Eigen_Cholesky_SRCS "*.h")

 INSTALL(FILES
  ${Eigen_Cholesky_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Cholesky
+  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Cholesky COMPONENT Devel
  )
--- a/Eigen/src/Cholesky/CholeskyWithoutSquareRoot.h
+++ b/Eigen/src/Cholesky/CholeskyWithoutSquareRoot.h
@@ -1,174 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra. Eigen itself is part of the KDE project.
-//
-// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-//
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
-
-#ifndef EIGEN_CHOLESKY_WITHOUT_SQUARE_ROOT_H
-#define EIGEN_CHOLESKY_WITHOUT_SQUARE_ROOT_H
-
-/** \ingroup Cholesky_Module
-  *
-  * \class CholeskyWithoutSquareRoot
-  *
-  * \brief Robust Cholesky decomposition of a matrix and associated features
-  *
-  * \param MatrixType the type of the matrix of which we are computing the Cholesky decomposition
-  *
-  * This class performs a Cholesky decomposition without square root of a symmetric, positive definite
-  * matrix A such that A = L D L^* = U^* D U, where L is lower triangular with a unit diagonal
-  * and D is a diagonal matrix.
-  *
-  * Compared to a standard Cholesky decomposition, avoiding the square roots allows for faster and more
-  * stable computation.
-  *
-  * Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
-  * the strict lower part does not have to store correct values.
-  *
-  * \sa MatrixBase::choleskyNoSqrt(), class Cholesky
-  */
-template<typename MatrixType> class CholeskyWithoutSquareRoot
-{
-  public:
-
-    typedef typename MatrixType::Scalar Scalar;
-    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
-    typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
-
-    CholeskyWithoutSquareRoot(const MatrixType& matrix)
-      : m_matrix(matrix.rows(), matrix.cols())
-    {
-      compute(matrix);
-    }
-
-    /** \returns the lower triangular matrix L */
-    inline Part<MatrixType, UnitLower> matrixL(void) const { return m_matrix; }
-
-    /** \returns the coefficients of the diagonal matrix D */
-    inline DiagonalCoeffs<MatrixType> vectorD(void) const { return m_matrix.diagonal(); }
-
-    /** \returns true if the matrix is positive definite */
-    inline bool isPositiveDefinite(void) const { return m_isPositiveDefinite; }
-
-    template<typename Derived>
-    typename Derived::Eval solve(const MatrixBase<Derived> &b) const;
-
-    void compute(const MatrixType& matrix);
-
-  protected:
-    /** \internal
-      * Used to compute and store the cholesky decomposition A = L D L^* = U^* D U.
-      * The strict upper part is used during the decomposition, the strict lower
-      * part correspond to the coefficients of L (its diagonal is equal to 1 and
-      * is not stored), and the diagonal entries correspond to D.
-      */
-    MatrixType m_matrix;
-
-    bool m_isPositiveDefinite;
-};
-
-/** Compute / recompute the Cholesky decomposition A = L D L^* = U^* D U of \a matrix
-  */
-template<typename MatrixType>
-void CholeskyWithoutSquareRoot<MatrixType>::compute(const MatrixType& a)
-{
-  assert(a.rows()==a.cols());
-  const int size = a.rows();
-  m_matrix.resize(size, size);
-  m_isPositiveDefinite = true;
-  const RealScalar eps = ei_sqrt(precision<Scalar>());
-
-  if (size<=1)
-  {
-    m_matrix = a;
-    return;
-  }
-  
-  // Let's preallocate a temporay vector to evaluate the matrix-vector product into it.
-  // Unlike the standard Cholesky decomposition, here we cannot evaluate it to the destination
-  // matrix because it a sub-row which is not compatible suitable for efficient packet evaluation.
-  // (at least if we assume the matrix is col-major)
-  Matrix<Scalar,MatrixType::RowsAtCompileTime,1> _temporary(size);
-
-  // Note that, in this algorithm the rows of the strict upper part of m_matrix is used to store
-  // column vector, thus the strange .conjugate() and .transpose()...
-
-  m_matrix.row(0) = a.row(0).conjugate();
-  m_matrix.col(0).end(size-1) = m_matrix.row(0).end(size-1) / m_matrix.coeff(0,0);
-  for (int j = 1; j < size; ++j)
-  {
-    RealScalar tmp = ei_real(a.coeff(j,j) - (m_matrix.row(j).start(j) * m_matrix.col(j).start(j).conjugate()).coeff(0,0));
-    m_matrix.coeffRef(j,j) = tmp;
-
-    if (tmp < eps)
-    {
-      m_isPositiveDefinite = false;
-      return;
-    }
-
-    int endSize = size-j-1;
-    if (endSize>0)
-    {
-      _temporary.end(endSize) = ( m_matrix.block(j+1,0, endSize, j)
-                                  * m_matrix.col(j).start(j).conjugate() ).lazy();
-
-      m_matrix.row(j).end(endSize) = a.row(j).end(endSize).conjugate()
-                                   - _temporary.end(endSize).transpose();
-
-      m_matrix.col(j).end(endSize) = m_matrix.row(j).end(endSize) / tmp;
-    }
-  }
-}
-
-/** \returns the solution of \f$ A x = b \f$ using the current decomposition of A.
-  * In other words, it returns \f$ A^{-1} b \f$ computing
-  * \f$ {L^{*}}^{-1} D^{-1} L^{-1} b \f$ from right to left.
-  * \param b the column vector \f$ b \f$, which can also be a matrix.
-  *
-  * See Cholesky::solve() for a example.
-  * 
-  * \sa MatrixBase::choleskyNoSqrt()
-  */
-template<typename MatrixType>
-template<typename Derived>
-typename Derived::Eval CholeskyWithoutSquareRoot<MatrixType>::solve(const MatrixBase<Derived> &b) const
-{
-  const int size = m_matrix.rows();
-  ei_assert(size==b.rows());
-
-  return m_matrix.adjoint().template part<UnitUpper>()
-    .solveTriangular(
-      (  m_matrix.cwise().inverse().template part<Diagonal>()
-       * matrixL().solveTriangular(b))
-     );
-}
-
-/** \cholesky_module
-  * \returns the Cholesky decomposition without square root of \c *this
-  */
-template<typename Derived>
-inline const CholeskyWithoutSquareRoot<typename MatrixBase<Derived>::EvalType>
-MatrixBase<Derived>::choleskyNoSqrt() const
-{
-  return derived();
-}
-
-#endif // EIGEN_CHOLESKY_WITHOUT_SQUARE_ROOT_H
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -0,0 +1,277 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2009 Keir Mierle <mierle@gmail.com>
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_LDLT_H
+#define EIGEN_LDLT_H
+
+/** \ingroup cholesky_Module
+  *
+  * \class LDLT
+  *
+  * \brief Robust Cholesky decomposition of a matrix
+  *
+  * \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
+  *
+  * Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
+  * matrix \f$ A \f$ such that \f$ A =  P^TLDL^*P \f$, where P is a permutation matrix, L
+  * is lower triangular with a unit diagonal and D is a diagonal matrix.
+  *
+  * The decomposition uses pivoting to ensure stability, so that L will have
+  * zeros in the bottom right rank(A) - n submatrix. Avoiding the square root
+  * on D also stabilizes the computation.
+  *
+  * Remember that Cholesky decompositions are not rank-revealing.  Also, do not use a Cholesky decomposition to determine
+  * whether a system of equations has a solution.
+  *
+  * \sa MatrixBase::ldlt(), class LLT
+  */
+ /* THIS PART OF THE DOX IS CURRENTLY DISABLED BECAUSE INACCURATE BECAUSE OF BUG IN THE DECOMPOSITION CODE
+  * Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
+  * the strict lower part does not have to store correct values.
+  */
+template<typename MatrixType> class LDLT
+{
+  public:
+
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+    typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
+    typedef Matrix<int, MatrixType::RowsAtCompileTime, 1> IntColVectorType;
+    typedef Matrix<int, 1, MatrixType::RowsAtCompileTime> IntRowVectorType;
+
+    LDLT(const MatrixType& matrix)
+      : m_matrix(matrix.rows(), matrix.cols()),
+        m_p(matrix.rows()),
+        m_transpositions(matrix.rows())
+    {
+      compute(matrix);
+    }
+
+    /** \returns the lower triangular matrix L */
+    inline Part<MatrixType, UnitLowerTriangular> matrixL(void) const { return m_matrix; }
+
+    /** \returns a vector of integers, whose size is the number of rows of the matrix being decomposed,
+      * representing the P permutation i.e. the permutation of the rows. For its precise meaning,
+      * see the examples given in the documentation of class LU.
+      */
+    inline const IntColVectorType& permutationP() const
+    {
+      return m_p;
+    }
+
+    /** \returns the coefficients of the diagonal matrix D */
+    inline Diagonal<MatrixType,0> vectorD(void) const { return m_matrix.diagonal(); }
+
+    /** \returns true if the matrix is positive (semidefinite) */
+    inline bool isPositive(void) const { return m_sign == 1; }
+
+    /** \returns true if the matrix is negative (semidefinite) */
+    inline bool isNegative(void) const { return m_sign == -1; }
+
+    template<typename RhsDerived, typename ResDerived>
+    bool solve(const MatrixBase<RhsDerived> &b, MatrixBase<ResDerived> *result) const;
+
+    template<typename Derived>
+    bool solveInPlace(MatrixBase<Derived> &bAndX) const;
+
+    void compute(const MatrixType& matrix);
+
+  protected:
+    /** \internal
+      * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
+      * The strict upper part is used during the decomposition, the strict lower
+      * part correspond to the coefficients of L (its diagonal is equal to 1 and
+      * is not stored), and the diagonal entries correspond to D.
+      */
+    MatrixType m_matrix;
+    IntColVectorType m_p;
+    IntColVectorType m_transpositions;
+    int m_sign;
+};
+
+/** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix
+  */
+template<typename MatrixType>
+void LDLT<MatrixType>::compute(const MatrixType& a)
+{
+  ei_assert(a.rows()==a.cols());
+  const int size = a.rows();
+
+  m_matrix = a;
+
+  if (size <= 1) {
+    m_p.setZero();
+    m_transpositions.setZero();
+    m_sign = ei_real(a.coeff(0,0))>0 ? 1:-1;
+    return;
+  }
+
+  RealScalar cutoff = 0, biggest_in_corner;
+
+  // By using a temorary, packet-aligned products are guarenteed. In the LLT
+  // case this is unnecessary because the diagonal is included and will always
+  // have optimal alignment.
+  Matrix<Scalar,MatrixType::RowsAtCompileTime,1> _temporary(size);
+
+  for (int j = 0; j < size; ++j)
+  {
+    // Find largest diagonal element
+    int index_of_biggest_in_corner;
+    biggest_in_corner = m_matrix.diagonal().end(size-j).cwise().abs()
+                       .maxCoeff(&index_of_biggest_in_corner);
+    index_of_biggest_in_corner += j;
+
+    if(j == 0)
+    {
+      // The biggest overall is the point of reference to which further diagonals
+      // are compared; if any diagonal is negligible compared
+      // to the largest overall, the algorithm bails.  This cutoff is suggested
+      // in "Analysis of the Cholesky Decomposition of a Semi-definite Matrix" by
+      // Nicholas J. Higham. Also see "Accuracy and Stability of Numerical
+      // Algorithms" page 217, also by Higham.
+      cutoff = ei_abs(machine_epsilon<Scalar>() * size * biggest_in_corner);
+
+      m_sign = ei_real(m_matrix.diagonal().coeff(index_of_biggest_in_corner)) > 0 ? 1 : -1;
+    }
+
+    // Finish early if the matrix is not full rank.
+    if(biggest_in_corner < cutoff)
+    {
+      for(int i = j; i < size; i++) m_transpositions.coeffRef(i) = i;
+      break;
+    }
+
+    m_transpositions.coeffRef(j) = index_of_biggest_in_corner;
+    if(j != index_of_biggest_in_corner)
+    {
+      m_matrix.row(j).swap(m_matrix.row(index_of_biggest_in_corner));
+      m_matrix.col(j).swap(m_matrix.col(index_of_biggest_in_corner));
+    }
+
+    if (j == 0) {
+      m_matrix.row(0) = m_matrix.row(0).conjugate();
+      m_matrix.col(0).end(size-1) = m_matrix.row(0).end(size-1) / m_matrix.coeff(0,0);
+      continue;
+    }
+
+    RealScalar Djj = ei_real(m_matrix.coeff(j,j) - (m_matrix.row(j).start(j)
+                                                  * m_matrix.col(j).start(j).conjugate()).coeff(0,0));
+    m_matrix.coeffRef(j,j) = Djj;
+
+    // Finish early if the matrix is not full rank.
+    if(ei_abs(Djj) < cutoff)
+    {
+      for(int i = j; i < size; i++) m_transpositions.coeffRef(i) = i;
+      break;
+    }
+
+    int endSize = size - j - 1;
+    if (endSize > 0) {
+      _temporary.end(endSize) = ( m_matrix.block(j+1,0, endSize, j)
+                                * m_matrix.col(j).start(j).conjugate() ).lazy();
+
+      m_matrix.row(j).end(endSize) = m_matrix.row(j).end(endSize).conjugate()
+                                   - _temporary.end(endSize).transpose();
+
+      m_matrix.col(j).end(endSize) = m_matrix.row(j).end(endSize) / Djj;
+    }
+  }
+
+  // Reverse applied swaps to get P matrix.
+  for(int k = 0; k < size; ++k) m_p.coeffRef(k) = k;
+  for(int k = size-1; k >= 0; --k) {
+    std::swap(m_p.coeffRef(k), m_p.coeffRef(m_transpositions.coeff(k)));
+  }
+}
+
+/** Computes the solution x of \f$ A x = b \f$ using the current decomposition of A.
+  * The result is stored in \a result
+  *
+  * \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
+  *
+  * In other words, it computes \f$ b = A^{-1} b \f$ with
+  * \f$ P^T{L^{*}}^{-1} D^{-1} L^{-1} P b \f$ from right to left.
+  *
+  * \sa LDLT::solveInPlace(), MatrixBase::ldlt()
+  */
+template<typename MatrixType>
+template<typename RhsDerived, typename ResDerived>
+bool LDLT<MatrixType>
+::solve(const MatrixBase<RhsDerived> &b, MatrixBase<ResDerived> *result) const
+{
+  const int size = m_matrix.rows();
+  ei_assert(size==b.rows() && "LDLT::solve(): invalid number of rows of the right hand side matrix b");
+  *result = b;
+  return solveInPlace(*result);
+}
+
+/** This is the \em in-place version of solve().
+  *
+  * \param bAndX represents both the right-hand side matrix b and result x.
+  *
+  * \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
+  *
+  * This version avoids a copy when the right hand side matrix b is not
+  * needed anymore.
+  *
+  * \sa LDLT::solve(), MatrixBase::ldlt()
+  */
+template<typename MatrixType>
+template<typename Derived>
+bool LDLT<MatrixType>::solveInPlace(MatrixBase<Derived> &bAndX) const
+{
+  const int size = m_matrix.rows();
+  ei_assert(size == bAndX.rows());
+
+  // z = P b
+  for(int i = 0; i < size; ++i) bAndX.row(m_transpositions.coeff(i)).swap(bAndX.row(i));
+
+  // y = L^-1 z
+  matrixL().solveTriangularInPlace(bAndX);
+
+  // w = D^-1 y
+  bAndX = (m_matrix.diagonal().cwise().inverse().asDiagonal() * bAndX).lazy();
+
+  // u = L^-T w
+  m_matrix.adjoint().template part<UnitUpperTriangular>().solveTriangularInPlace(bAndX);
+
+  // x = P^T u
+  for (int i = size-1; i >= 0; --i) bAndX.row(m_transpositions.coeff(i)).swap(bAndX.row(i));
+
+  return true;
+}
+
+/** \cholesky_module
+  * \returns the Cholesky decomposition with full pivoting without square root of \c *this
+  */
+template<typename Derived>
+inline const LDLT<typename MatrixBase<Derived>::PlainMatrixType>
+MatrixBase<Derived>::ldlt() const
+{
+  return derived();
+}
+
+#endif // EIGEN_LDLT_H
--- a/Eigen/src/Cholesky/Cholesky.h
+++ b/Eigen/src/Cholesky/Cholesky.h
@@ -22,18 +22,18 @@
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.

-#ifndef EIGEN_CHOLESKY_H
-#define EIGEN_CHOLESKY_H
+#ifndef EIGEN_LLT_H
+#define EIGEN_LLT_H

-/** \ingroup Cholesky_Module
+/** \ingroup cholesky_Module
  *
-  * \class Cholesky
+  * \class LLT
  *
-  * \brief Standard Cholesky decomposition of a matrix and associated features
+  * \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
  *
-  * \param MatrixType the type of the matrix of which we are computing the Cholesky decomposition
+  * \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
  *
-  * This class performs a standard Cholesky decomposition of a symmetric, positive definite
+  * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
  * matrix A such that A = LL^* = U^*U, where L is lower triangular.
  *
  * While the Cholesky decomposition is particularly useful to solve selfadjoint problems like  D^*D x = b,
@@ -41,12 +41,17 @@
  * and even faster. Nevertheless, this standard Cholesky decomposition remains useful in many other
  * situations like generalised eigen problems with hermitian matrices.
  *
+  * Remember that Cholesky decompositions are not rank-revealing. This LLT decomposition is only stable on positive definite matrices,
+  * use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine whether a system of equations
+  * has a solution.
+  *
+  * \sa MatrixBase::llt(), class LDLT
+  */
+ /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
  * Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
  * the strict lower part does not have to store correct values.
-  *
-  * \sa MatrixBase::cholesky(), class CholeskyWithoutSquareRoot
  */
-template<typename MatrixType> class Cholesky
+template<typename MatrixType> class LLT
 {
  private:
    typedef typename MatrixType::Scalar Scalar;
@@ -59,20 +64,21 @@ template<typename MatrixType> class Cholesky
    };

  public:
-  
-    Cholesky(const MatrixType& matrix)
+
+    LLT(const MatrixType& matrix)
      : m_matrix(matrix.rows(), matrix.cols())
    {
      compute(matrix);
    }

-    inline Part<MatrixType, Lower> matrixL(void) const { return m_matrix; }
+    /** \returns the lower triangular matrix L */
+    inline Part<MatrixType, LowerTriangular> matrixL(void) const { return m_matrix; }

-    /** \returns true if the matrix is positive definite */
-    inline bool isPositiveDefinite(void) const { return m_isPositiveDefinite; }
+    template<typename RhsDerived, typename ResDerived>
+    bool solve(const MatrixBase<RhsDerived> &b, MatrixBase<ResDerived> *result) const;

    template<typename Derived>
-    typename Derived::Eval solve(const MatrixBase<Derived> &b) const;
+    bool solveInPlace(MatrixBase<Derived> &bAndX) const;

    void compute(const MatrixType& matrix);

@@ -82,33 +88,34 @@ template<typename MatrixType> class Cholesky
      * The strict upper part is not used and even not initialized.
      */
    MatrixType m_matrix;
-    bool m_isPositiveDefinite;
 };

 /** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix
  */
 template<typename MatrixType>
-void Cholesky<MatrixType>::compute(const MatrixType& a)
+void LLT<MatrixType>::compute(const MatrixType& a)
 {
  assert(a.rows()==a.cols());
  const int size = a.rows();
  m_matrix.resize(size, size);
-  const RealScalar eps = ei_sqrt(precision<Scalar>());
-
+  // The biggest overall is the point of reference to which further diagonals
+  // are compared; if any diagonal is negligible compared
+  // to the largest overall, the algorithm bails.  This cutoff is suggested
+  // in "Analysis of the Cholesky Decomposition of a Semi-definite Matrix" by
+  // Nicholas J. Higham. Also see "Accuracy and Stability of Numerical
+  // Algorithms" page 217, also by Higham.
+  const RealScalar cutoff = machine_epsilon<Scalar>() * size * a.diagonal().cwise().abs().maxCoeff();
  RealScalar x;
  x = ei_real(a.coeff(0,0));
-  m_isPositiveDefinite = x > eps && ei_isMuchSmallerThan(ei_imag(a.coeff(0,0)), RealScalar(1));
  m_matrix.coeffRef(0,0) = ei_sqrt(x);
+  if(size==1)
+    return;
  m_matrix.col(0).end(size-1) = a.row(0).end(size-1).adjoint() / ei_real(m_matrix.coeff(0,0));
  for (int j = 1; j < size; ++j)
  {
-    Scalar tmp = ei_real(a.coeff(j,j)) - m_matrix.row(j).start(j).norm2();
-    x = ei_real(tmp);
-    if (x < eps || (!ei_isMuchSmallerThan(ei_imag(tmp), RealScalar(1))))
-    {
-      m_isPositiveDefinite = false;
-      return;
-    }
+    x = ei_real(a.coeff(j,j)) - m_matrix.row(j).start(j).squaredNorm();
+    if (ei_abs(x) < cutoff) continue;
+
    m_matrix.coeffRef(j,j) = x = ei_sqrt(x);

    int endSize = size-j-1;
@@ -125,34 +132,58 @@ void Cholesky<MatrixType>::compute(const MatrixType& a)
  }
 }

-/** \returns the solution of \f$ A x = b \f$ using the current decomposition of A.
-  * In other words, it returns \f$ A^{-1} b \f$ computing
+/** Computes the solution x of \f$ A x = b \f$ using the current decomposition of A.
+  * The result is stored in \a result
+  *
+  * \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
+  *
+  * In other words, it computes \f$ b = A^{-1} b \f$ with
  * \f$ {L^{*}}^{-1} L^{-1} b \f$ from right to left.
-  * \param b the column vector \f$ b \f$, which can also be a matrix.
  *
-  * Example: \include Cholesky_solve.cpp
-  * Output: \verbinclude Cholesky_solve.out
+  * Example: \include LLT_solve.cpp
+  * Output: \verbinclude LLT_solve.out
  *
-  * \sa MatrixBase::cholesky(), CholeskyWithoutSquareRoot::solve()
+  * \sa LLT::solveInPlace(), MatrixBase::llt()
+  */
+template<typename MatrixType>
+template<typename RhsDerived, typename ResDerived>
+bool LLT<MatrixType>::solve(const MatrixBase<RhsDerived> &b, MatrixBase<ResDerived> *result) const
+{
+  const int size = m_matrix.rows();
+  ei_assert(size==b.rows() && "LLT::solve(): invalid number of rows of the right hand side matrix b");
+  return solveInPlace((*result) = b);
+}
+
+/** This is the \em in-place version of solve().
+  *
+  * \param bAndX represents both the right-hand side matrix b and result x.
+  *
+  * \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
+  *
+  * This version avoids a copy when the right hand side matrix b is not
+  * needed anymore.
+  *
+  * \sa LLT::solve(), MatrixBase::llt()
  */
 template<typename MatrixType>
 template<typename Derived>
-typename Derived::Eval Cholesky<MatrixType>::solve(const MatrixBase<Derived> &b) const
+bool LLT<MatrixType>::solveInPlace(MatrixBase<Derived> &bAndX) const
 {
  const int size = m_matrix.rows();
-  ei_assert(size==b.rows());
-
-  return m_matrix.adjoint().template part<Upper>().solveTriangular(matrixL().solveTriangular(b));
+  ei_assert(size==bAndX.rows());
+  matrixL().solveTriangularInPlace(bAndX);
+  m_matrix.adjoint().template part<UpperTriangular>().solveTriangularInPlace(bAndX);
+  return true;
 }

 /** \cholesky_module
-  * \returns the Cholesky decomposition of \c *this
+  * \returns the LLT decomposition of \c *this
  */
 template<typename Derived>
-inline const Cholesky<typename MatrixBase<Derived>::EvalType>
-MatrixBase<Derived>::cholesky() const
+inline const LLT<typename MatrixBase<Derived>::PlainMatrixType>
+MatrixBase<Derived>::llt() const
 {
-  return Cholesky<typename ei_eval<Derived>::type>(derived());
+  return LLT<PlainMatrixType>(derived());
 }

-#endif // EIGEN_CHOLESKY_H
+#endif // EIGEN_LLT_H
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2007 Michael Olbrich <michael.olbrich@gmx.net>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
 //
 // Eigen is free software; you can redistribute it and/or
@@ -112,7 +112,7 @@ struct ei_assign_novec_CompleteUnrolling
        : Index / Derived1::RowsAtCompileTime
  };

-  inline static void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    dst.copyCoeff(row, col, src);
    ei_assign_novec_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
@@ -122,13 +122,13 @@ struct ei_assign_novec_CompleteUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct ei_assign_novec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
 {
-  inline static void run(Derived1 &, const Derived2 &) {}
+  EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
 };

 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct ei_assign_novec_InnerUnrolling
 {
-  inline static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
  {
    const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
    const int row = rowMajor ? row_or_col : Index;
@@ -141,7 +141,7 @@ struct ei_assign_novec_InnerUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct ei_assign_novec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 {
-  inline static void run(Derived1 &, const Derived2 &, int) {}
+  EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
 };

 /**************************
@@ -161,7 +161,7 @@ struct ei_assign_innervec_CompleteUnrolling
    SrcAlignment = ei_assign_traits<Derived1,Derived2>::SrcAlignment
  };

-  inline static void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    dst.template copyPacket<Derived2, Aligned, SrcAlignment>(row, col, src);
    ei_assign_innervec_CompleteUnrolling<Derived1, Derived2,
@@ -172,13 +172,13 @@ struct ei_assign_innervec_CompleteUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
 {
-  inline static void run(Derived1 &, const Derived2 &) {}
+  EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
 };

 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct ei_assign_innervec_InnerUnrolling
 {
-  inline static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
  {
    const int row = int(Derived1::Flags)&RowMajorBit ? row_or_col : Index;
    const int col = int(Derived1::Flags)&RowMajorBit ? Index : row_or_col;
@@ -191,7 +191,7 @@ struct ei_assign_innervec_InnerUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct ei_assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 {
-  inline static void run(Derived1 &, const Derived2 &, int) {}
+  EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
 };

 /***************************************************************************
@@ -210,12 +210,12 @@ struct ei_assign_impl;
 template<typename Derived1, typename Derived2>
 struct ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
 {
-  static void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    const int innerSize = dst.innerSize();
    const int outerSize = dst.outerSize();
-    for(int j = 0; j < outerSize; j++)
-      for(int i = 0; i < innerSize; i++)
+    for(int j = 0; j < outerSize; ++j)
+      for(int i = 0; i < innerSize; ++i)
      {
        if(int(Derived1::Flags)&RowMajorBit)
          dst.copyCoeff(j, i, src);
@@ -228,7 +228,7 @@ struct ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
 template<typename Derived1, typename Derived2>
 struct ei_assign_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling>
 {
-  inline static void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    ei_assign_novec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
      ::run(dst, src);
@@ -238,12 +238,12 @@ struct ei_assign_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling>
 template<typename Derived1, typename Derived2>
 struct ei_assign_impl<Derived1, Derived2, NoVectorization, InnerUnrolling>
 {
-  static void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
    const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
    const int outerSize = dst.outerSize();
-    for(int j = 0; j < outerSize; j++)
+    for(int j = 0; j < outerSize; ++j)
      ei_assign_novec_InnerUnrolling<Derived1, Derived2, 0, innerSize>
        ::run(dst, src, j);
  }
@@ -256,12 +256,12 @@ struct ei_assign_impl<Derived1, Derived2, NoVectorization, InnerUnrolling>
 template<typename Derived1, typename Derived2>
 struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling>
 {
-  static void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    const int innerSize = dst.innerSize();
    const int outerSize = dst.outerSize();
    const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
-    for(int j = 0; j < outerSize; j++)
+    for(int j = 0; j < outerSize; ++j)
      for(int i = 0; i < innerSize; i+=packetSize)
      {
        if(int(Derived1::Flags)&RowMajorBit)
@@ -275,7 +275,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling>
 template<typename Derived1, typename Derived2>
 struct ei_assign_impl<Derived1, Derived2, InnerVectorization, CompleteUnrolling>
 {
-  inline static void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
      ::run(dst, src);
@@ -285,12 +285,12 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, CompleteUnrolling>
 template<typename Derived1, typename Derived2>
 struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling>
 {
-  static void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
    const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
    const int outerSize = dst.outerSize();
-    for(int j = 0; j < outerSize; j++)
+    for(int j = 0; j < outerSize; ++j)
      ei_assign_innervec_InnerUnrolling<Derived1, Derived2, 0, innerSize>
        ::run(dst, src, j);
  }
@@ -303,7 +303,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling>
 template<typename Derived1, typename Derived2>
 struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
 {
-  static void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    const int size = dst.size();
    const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
@@ -311,7 +311,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
                           : ei_alignmentOffset(&dst.coeffRef(0), size);
    const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;

-    for(int index = 0; index < alignedStart; index++)
+    for(int index = 0; index < alignedStart; ++index)
      dst.copyCoeff(index, src);

    for(int index = alignedStart; index < alignedEnd; index += packetSize)
@@ -319,7 +319,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
      dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src);
    }

-    for(int index = alignedEnd; index < size; index++)
+    for(int index = alignedEnd; index < size; ++index)
      dst.copyCoeff(index, src);
  }
 };
@@ -327,7 +327,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
 template<typename Derived1, typename Derived2>
 struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling>
 {
-  static void run(Derived1 &dst, const Derived2 &src)
+  EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
  {
    const int size = Derived1::SizeAtCompileTime;
    const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
@@ -345,7 +345,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling
 template<typename Derived1, typename Derived2>
 struct ei_assign_impl<Derived1, Derived2, SliceVectorization, NoUnrolling>
 {
-  static void run(Derived1 &dst, const Derived2 &src)
+  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
    const int packetAlignedMask = packetSize - 1;
@@ -353,14 +353,14 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorization, NoUnrolling>
    const int outerSize = dst.outerSize();
    const int alignedStep = (packetSize - dst.stride() % packetSize) & packetAlignedMask;
    int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
-                     : ei_alignmentOffset(&dst.coeffRef(0), innerSize);
+                     : ei_alignmentOffset(&dst.coeffRef(0,0), innerSize);

-    for(int i = 0; i < outerSize; i++)
+    for(int i = 0; i < outerSize; ++i)
    {
      const int alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);

      // do the non-vectorizable part of the assignment
-      for (int index = 0; index<alignedStart ; index++)
+      for (int index = 0; index<alignedStart ; ++index)
      {
        if(Derived1::Flags&RowMajorBit)
          dst.copyCoeff(i, index, src);
@@ -378,7 +378,7 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorization, NoUnrolling>
      }

      // do the non-vectorizable part of the assignment
-      for (int index = alignedEnd; index<innerSize ; index++)
+      for (int index = alignedEnd; index<innerSize ; ++index)
      {
        if(Derived1::Flags&RowMajorBit)
          dst.copyCoeff(i, index, src);
@@ -397,17 +397,19 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorization, NoUnrolling>

 template<typename Derived>
 template<typename OtherDerived>
-inline Derived& MatrixBase<Derived>
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>
  ::lazyAssign(const MatrixBase<OtherDerived>& other)
 {
-  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived);
+  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
+  EIGEN_STATIC_ASSERT((ei_is_same_type<typename Derived::Scalar, typename OtherDerived::Scalar>::ret),
+    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
  ei_assert(rows() == other.rows() && cols() == other.cols());
  ei_assign_impl<Derived, OtherDerived>::run(derived(),other.derived());
  return derived();
 }

 template<typename Derived, typename OtherDerived,
-         bool EvalBeforeAssigning = int(OtherDerived::Flags) & EvalBeforeAssigningBit,
+         bool EvalBeforeAssigning = (int(OtherDerived::Flags) & EvalBeforeAssigningBit) != 0,
         bool NeedToTranspose = Derived::IsVectorAtCompileTime
                && OtherDerived::IsVectorAtCompileTime
                && int(Derived::RowsAtCompileTime) == int(OtherDerived::ColsAtCompileTime)
@@ -417,24 +419,24 @@ struct ei_assign_selector;

 template<typename Derived, typename OtherDerived>
 struct ei_assign_selector<Derived,OtherDerived,false,false> {
-  static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
+  EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
 };
 template<typename Derived, typename OtherDerived>
 struct ei_assign_selector<Derived,OtherDerived,true,false> {
-  static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
+  EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
 };
 template<typename Derived, typename OtherDerived>
 struct ei_assign_selector<Derived,OtherDerived,false,true> {
-  static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
+  EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
 };
 template<typename Derived, typename OtherDerived>
 struct ei_assign_selector<Derived,OtherDerived,true,true> {
-  static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
+  EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
 };

 template<typename Derived>
 template<typename OtherDerived>
-inline Derived& MatrixBase<Derived>
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>
  ::operator=(const MatrixBase<OtherDerived>& other)
 {
  return ei_assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -64,24 +64,24 @@
 template<typename MatrixType, int BlockRows, int BlockCols, int _PacketAccess, int _DirectAccessStatus>
 struct ei_traits<Block<MatrixType, BlockRows, BlockCols, _PacketAccess, _DirectAccessStatus> >
 {
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::Nested MatrixTypeNested;
+  typedef typename ei_traits<MatrixType>::Scalar Scalar;
+  typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
  typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
  enum{
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime == 1 ? 1 : BlockRows,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime == 1 ? 1 : BlockCols,
+    RowsAtCompileTime = BlockRows,
+    ColsAtCompileTime = BlockCols,
    MaxRowsAtCompileTime = RowsAtCompileTime == 1 ? 1
-      : (BlockRows==Dynamic ? MatrixType::MaxRowsAtCompileTime : BlockRows),
+      : (BlockRows==Dynamic ? int(ei_traits<MatrixType>::MaxRowsAtCompileTime) : BlockRows),
    MaxColsAtCompileTime = ColsAtCompileTime == 1 ? 1
-      : (BlockCols==Dynamic ? MatrixType::MaxColsAtCompileTime : BlockCols),
-    RowMajor = int(MatrixType::Flags)&RowMajorBit,
-    InnerSize = RowMajor ? ColsAtCompileTime : RowsAtCompileTime,
-    InnerMaxSize = RowMajor ? MaxColsAtCompileTime : MaxRowsAtCompileTime,
+      : (BlockCols==Dynamic ? int(ei_traits<MatrixType>::MaxColsAtCompileTime) : BlockCols),
+    RowMajor = int(ei_traits<MatrixType>::Flags)&RowMajorBit,
+    InnerSize = RowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+    InnerMaxSize = RowMajor ? int(MaxColsAtCompileTime) : int(MaxRowsAtCompileTime),
    MaskPacketAccessBit = (InnerMaxSize == Dynamic || (InnerSize >= ei_packet_traits<Scalar>::size))
                        ? PacketAccessBit : 0,
    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
-    Flags = (MatrixType::Flags & (HereditaryBits | MaskPacketAccessBit | DirectAccessBit)) | FlagsLinearAccessBit,
-    CoeffReadCost = MatrixType::CoeffReadCost,
+    Flags = (ei_traits<MatrixType>::Flags & (HereditaryBits | MaskPacketAccessBit | DirectAccessBit)) | FlagsLinearAccessBit,
+    CoeffReadCost = ei_traits<MatrixType>::CoeffReadCost,
    PacketAccess = _PacketAccess
  };
  typedef typename ei_meta_if<int(PacketAccess)==ForceAligned,
@@ -122,7 +122,7 @@ template<typename MatrixType, int BlockRows, int BlockCols, int PacketAccess, in
      : m_matrix(matrix), m_startRow(startRow), m_startCol(startCol),
        m_blockRows(matrix.rows()), m_blockCols(matrix.cols())
    {
-      EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && RowsAtCompileTime!=Dynamic,this_method_is_only_for_fixed_size);
+      EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
      ei_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= matrix.rows()
          && startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= matrix.cols());
    }
@@ -146,15 +146,13 @@ template<typename MatrixType, int BlockRows, int BlockCols, int PacketAccess, in
    inline int rows() const { return m_blockRows.value(); }
    inline int cols() const { return m_blockCols.value(); }

-    inline int stride(void) const { return m_matrix.stride(); }
-
    inline Scalar& coeffRef(int row, int col)
    {
      return m_matrix.const_cast_derived()
               .coeffRef(row + m_startRow.value(), col + m_startCol.value());
    }

-    inline const Scalar coeff(int row, int col) const
+    inline const CoeffReturnType coeff(int row, int col) const
    {
      return m_matrix.coeff(row + m_startRow.value(), col + m_startCol.value());
    }
@@ -166,7 +164,7 @@ template<typename MatrixType, int BlockRows, int BlockCols, int PacketAccess, in
                       m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
    }

-    inline const Scalar coeff(int index) const
+    inline const CoeffReturnType coeff(int index) const
    {
      return m_matrix
             .coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
@@ -223,15 +221,13 @@ class Block<MatrixType,BlockRows,BlockCols,PacketAccess,HasDirectAccess>

    class InnerIterator;
    typedef typename ei_traits<Block>::AlignedDerivedType AlignedDerivedType;
+    friend class Block<MatrixType,BlockRows,BlockCols,PacketAccess==AsRequested?ForceAligned:AsRequested,HasDirectAccess>;

    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)

-    AlignedDerivedType forceAligned()
+    AlignedDerivedType _convertToForceAligned()
    {
-      if (PacketAccess==ForceAligned)
-        return *this;
-      else
-        return Block<MatrixType,BlockRows,BlockCols,ForceAligned,HasDirectAccess>
+      return Block<MatrixType,BlockRows,BlockCols,ForceAligned,HasDirectAccess>
                    (m_matrix, Base::m_data, Base::m_rows.value(), Base::m_cols.value());
    }

@@ -318,41 +314,41 @@ inline const typename BlockReturnType<Derived>::Type MatrixBase<Derived>
  return typename BlockReturnType<Derived>::Type(derived(), startRow, startCol, blockRows, blockCols);
 }

-/** \returns a dynamic-size expression of a block in *this.
+/** \returns a dynamic-size expression of a segment (i.e. a vector block) in *this.
  *
  * \only_for_vectors
  *
-  * \addexample BlockIntInt \label How to reference a sub-vector (dynamic size)
+  * \addexample SegmentIntInt \label How to reference a sub-vector (dynamic size)
  *
-  * \param start the first coefficient in the block
-  * \param size the number of coefficients in the block
+  * \param start the first coefficient in the segment
+  * \param size the number of coefficients in the segment
  *
-  * Example: \include MatrixBase_block_int_int.cpp
-  * Output: \verbinclude MatrixBase_block_int_int.out
+  * Example: \include MatrixBase_segment_int_int.cpp
+  * Output: \verbinclude MatrixBase_segment_int_int.out
  *
  * \note Even though the returned expression has dynamic size, in the case
  * when it is applied to a fixed-size vector, it inherits a fixed maximal size,
  * which means that evaluating it does not cause a dynamic memory allocation.
  *
-  * \sa class Block, block(int)
+  * \sa class Block, segment(int)
  */
 template<typename Derived>
 inline typename BlockReturnType<Derived>::SubVectorType MatrixBase<Derived>
-  ::block(int start, int size)
+  ::segment(int start, int size)
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return typename BlockReturnType<Derived>::SubVectorType(derived(), RowsAtCompileTime == 1 ? 0 : start,
                                   ColsAtCompileTime == 1 ? 0 : start,
                                   RowsAtCompileTime == 1 ? 1 : size,
                                   ColsAtCompileTime == 1 ? 1 : size);
 }

-/** This is the const version of block(int,int).*/
+/** This is the const version of segment(int,int).*/
 template<typename Derived>
 inline const typename BlockReturnType<Derived>::SubVectorType
-MatrixBase<Derived>::block(int start, int size) const
+MatrixBase<Derived>::segment(int start, int size) const
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return typename BlockReturnType<Derived>::SubVectorType(derived(), RowsAtCompileTime == 1 ? 0 : start,
                                   ColsAtCompileTime == 1 ? 0 : start,
                                   RowsAtCompileTime == 1 ? 1 : size,
@@ -380,7 +376,7 @@ template<typename Derived>
 inline typename BlockReturnType<Derived,Dynamic>::SubVectorType
 MatrixBase<Derived>::start(int size)
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return Block<Derived,
               RowsAtCompileTime == 1 ? 1 : Dynamic,
               ColsAtCompileTime == 1 ? 1 : Dynamic>
@@ -394,7 +390,7 @@ template<typename Derived>
 inline const typename BlockReturnType<Derived,Dynamic>::SubVectorType
 MatrixBase<Derived>::start(int size) const
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return Block<Derived,
               RowsAtCompileTime == 1 ? 1 : Dynamic,
               ColsAtCompileTime == 1 ? 1 : Dynamic>
@@ -424,7 +420,7 @@ template<typename Derived>
 inline typename BlockReturnType<Derived,Dynamic>::SubVectorType
 MatrixBase<Derived>::end(int size)
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return Block<Derived,
               RowsAtCompileTime == 1 ? 1 : Dynamic,
               ColsAtCompileTime == 1 ? 1 : Dynamic>
@@ -440,7 +436,7 @@ template<typename Derived>
 inline const typename BlockReturnType<Derived,Dynamic>::SubVectorType
 MatrixBase<Derived>::end(int size) const
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return Block<Derived,
               RowsAtCompileTime == 1 ? 1 : Dynamic,
               ColsAtCompileTime == 1 ? 1 : Dynamic>
@@ -451,38 +447,38 @@ MatrixBase<Derived>::end(int size) const
               ColsAtCompileTime == 1 ? 1 : size);
 }

-/** \returns a fixed-size expression of a sub-vector of \c *this
+/** \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this
  *
  * \only_for_vectors
  *
  * The template parameter \a Size is the number of coefficients in the block
-  * 
+  *
  * \param start the index of the first element of the sub-vector
  *
-  * Example: \include MatrixBase_template_int.cpp
-  * Output: \verbinclude MatrixBase_template_int.out
+  * Example: \include MatrixBase_template_int_segment.cpp
+  * Output: \verbinclude MatrixBase_template_int_segment.out
  *
  * \sa class Block
  */
 template<typename Derived>
 template<int Size>
 inline typename BlockReturnType<Derived,Size>::SubVectorType
-MatrixBase<Derived>::block(int start)
+MatrixBase<Derived>::segment(int start)
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return Block<Derived,  (RowsAtCompileTime == 1 ? 1 : Size),
                         (ColsAtCompileTime == 1 ? 1 : Size)>
              (derived(), RowsAtCompileTime == 1 ? 0 : start,
                          ColsAtCompileTime == 1 ? 0 : start);
 }

-/** This is the const version of block<int>(int).*/
+/** This is the const version of segment<int>(int).*/
 template<typename Derived>
 template<int Size>
 inline const typename BlockReturnType<Derived,Size>::SubVectorType
-MatrixBase<Derived>::block(int start) const
+MatrixBase<Derived>::segment(int start) const
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return Block<Derived,  (RowsAtCompileTime == 1 ? 1 : Size),
                         (ColsAtCompileTime == 1 ? 1 : Size)>
              (derived(), RowsAtCompileTime == 1 ? 0 : start,
@@ -507,7 +503,7 @@ template<int Size>
 inline typename BlockReturnType<Derived,Size>::SubVectorType
 MatrixBase<Derived>::start()
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return Block<Derived, (RowsAtCompileTime == 1 ? 1 : Size),
                        (ColsAtCompileTime == 1 ? 1 : Size)>(derived(), 0, 0);
 }
@@ -518,7 +514,7 @@ template<int Size>
 inline const typename BlockReturnType<Derived,Size>::SubVectorType
 MatrixBase<Derived>::start() const
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return Block<Derived, (RowsAtCompileTime == 1 ? 1 : Size),
                        (ColsAtCompileTime == 1 ? 1 : Size)>(derived(), 0, 0);
 }
@@ -539,7 +535,7 @@ template<int Size>
 inline typename BlockReturnType<Derived,Size>::SubVectorType
 MatrixBase<Derived>::end()
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return Block<Derived, RowsAtCompileTime == 1 ? 1 : Size,
                        ColsAtCompileTime == 1 ? 1 : Size>
           (derived(),
@@ -553,7 +549,7 @@ template<int Size>
 inline const typename BlockReturnType<Derived,Size>::SubVectorType
 MatrixBase<Derived>::end() const
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return Block<Derived, RowsAtCompileTime == 1 ? 1 : Size,
                        ColsAtCompileTime == 1 ? 1 : Size>
           (derived(),
--- a/Eigen/src/Core/CMakeLists.txt
+++ b/Eigen/src/Core/CMakeLists.txt
@@ -2,8 +2,16 @@ FILE(GLOB Eigen_Core_SRCS "*.h")

 INSTALL(FILES
  ${Eigen_Core_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core
+  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core COMPONENT Devel
  )

+FILE(GLOB Eigen_Core_Product_SRCS "products/*.h")
+
+INSTALL(FILES
+  ${Eigen_Core_Product_SRCS}
+  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/products COMPONENT Devel
+  )
+
+
 ADD_SUBDIRECTORY(util)
 ADD_SUBDIRECTORY(arch)
--- a/Eigen/src/Core/Coeffs.h
+++ b/Eigen/src/Core/Coeffs.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -40,7 +40,7 @@
  * \sa operator()(int,int) const, coeffRef(int,int), coeff(int) const
  */
 template<typename Derived>
-inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
  ::coeff(int row, int col) const
 {
  ei_internal_assert(row >= 0 && row < rows()
@@ -53,7 +53,7 @@ inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
  * \sa operator()(int,int), operator[](int) const
  */
 template<typename Derived>
-inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
  ::operator()(int row, int col) const
 {
  ei_assert(row >= 0 && row < rows()
@@ -76,7 +76,7 @@ inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
  * \sa operator()(int,int), coeff(int, int) const, coeffRef(int)
  */
 template<typename Derived>
-inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
  ::coeffRef(int row, int col)
 {
  ei_internal_assert(row >= 0 && row < rows()
@@ -89,7 +89,7 @@ inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
  * \sa operator()(int,int) const, operator[](int)
  */
 template<typename Derived>
-inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
  ::operator()(int row, int col)
 {
  ei_assert(row >= 0 && row < rows()
@@ -112,7 +112,7 @@ inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
  * \sa operator[](int) const, coeffRef(int), coeff(int,int) const
  */
 template<typename Derived>
-inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
  ::coeff(int index) const
 {
  ei_internal_assert(index >= 0 && index < size());
@@ -127,7 +127,7 @@ inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
  * z() const, w() const
  */
 template<typename Derived>
-inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
  ::operator[](int index) const
 {
  ei_assert(index >= 0 && index < size());
@@ -144,7 +144,7 @@ inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
  * z() const, w() const
  */
 template<typename Derived>
-inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
  ::operator()(int index) const
 {
  ei_assert(index >= 0 && index < size());
@@ -166,7 +166,7 @@ inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
  * \sa operator[](int), coeff(int) const, coeffRef(int,int)
  */
 template<typename Derived>
-inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
  ::coeffRef(int index)
 {
  ei_internal_assert(index >= 0 && index < size());
@@ -180,7 +180,7 @@ inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
  * \sa operator[](int) const, operator()(int,int), x(), y(), z(), w()
  */
 template<typename Derived>
-inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
  ::operator[](int index)
 {
  ei_assert(index >= 0 && index < size());
@@ -196,7 +196,7 @@ inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
  * \sa operator[](int) const, operator()(int,int), x(), y(), z(), w()
  */
 template<typename Derived>
-inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
  ::operator()(int index)
 {
  ei_assert(index >= 0 && index < size());
@@ -205,42 +205,42 @@ inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>

 /** equivalent to operator[](0).  */
 template<typename Derived>
-inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
  ::x() const { return (*this)[0]; }

 /** equivalent to operator[](1).  */
 template<typename Derived>
-inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
  ::y() const { return (*this)[1]; }

 /** equivalent to operator[](2).  */
 template<typename Derived>
-inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
  ::z() const { return (*this)[2]; }

 /** equivalent to operator[](3).  */
 template<typename Derived>
-inline const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
  ::w() const { return (*this)[3]; }

 /** equivalent to operator[](0).  */
 template<typename Derived>
-inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
  ::x() { return (*this)[0]; }

 /** equivalent to operator[](1).  */
 template<typename Derived>
-inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
  ::y() { return (*this)[1]; }

 /** equivalent to operator[](2).  */
 template<typename Derived>
-inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
  ::z() { return (*this)[2]; }

 /** equivalent to operator[](3).  */
 template<typename Derived>
-inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
  ::w() { return (*this)[3]; }

 /** \returns the packet of coefficients starting at the given row and column. It is your responsibility
@@ -253,7 +253,7 @@ inline typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
  */
 template<typename Derived>
 template<int LoadMode>
-inline typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type
+EIGEN_STRONG_INLINE typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type
 MatrixBase<Derived>::packet(int row, int col) const
 {
  ei_internal_assert(row >= 0 && row < rows()
@@ -271,7 +271,7 @@ MatrixBase<Derived>::packet(int row, int col) const
  */
 template<typename Derived>
 template<int StoreMode>
-inline void MatrixBase<Derived>::writePacket
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::writePacket
 (int row, int col, const typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type& x)
 {
  ei_internal_assert(row >= 0 && row < rows()
@@ -289,7 +289,7 @@ inline void MatrixBase<Derived>::writePacket
  */
 template<typename Derived>
 template<int LoadMode>
-inline typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type
+EIGEN_STRONG_INLINE typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type
 MatrixBase<Derived>::packet(int index) const
 {
  ei_internal_assert(index >= 0 && index < size());
@@ -306,33 +306,56 @@ MatrixBase<Derived>::packet(int index) const
  */
 template<typename Derived>
 template<int StoreMode>
-inline void MatrixBase<Derived>::writePacket
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::writePacket
 (int index, const typename ei_packet_traits<typename ei_traits<Derived>::Scalar>::type& x)
 {
  ei_internal_assert(index >= 0 && index < size());
  derived().template writePacket<StoreMode>(index,x);
 }

+#ifndef EIGEN_PARSED_BY_DOXYGEN
+
+/** \internal Copies the coefficient at position (row,col) of other into *this.
+  *
+  * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
+  * with usual assignments.
+  *
+  * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
+  */
 template<typename Derived>
 template<typename OtherDerived>
-inline void MatrixBase<Derived>::copyCoeff(int row, int col, const MatrixBase<OtherDerived>& other)
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::copyCoeff(int row, int col, const MatrixBase<OtherDerived>& other)
 {
  ei_internal_assert(row >= 0 && row < rows()
                     && col >= 0 && col < cols());
  derived().coeffRef(row, col) = other.derived().coeff(row, col);
 }

+/** \internal Copies the coefficient at the given index of other into *this.
+  *
+  * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
+  * with usual assignments.
+  *
+  * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
+  */
 template<typename Derived>
 template<typename OtherDerived>
-inline void MatrixBase<Derived>::copyCoeff(int index, const MatrixBase<OtherDerived>& other)
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::copyCoeff(int index, const MatrixBase<OtherDerived>& other)
 {
  ei_internal_assert(index >= 0 && index < size());
  derived().coeffRef(index) = other.derived().coeff(index);
 }

+/** \internal Copies the packet at position (row,col) of other into *this.
+  *
+  * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
+  * with usual assignments.
+  *
+  * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
+  */
 template<typename Derived>
 template<typename OtherDerived, int StoreMode, int LoadMode>
-inline void MatrixBase<Derived>::copyPacket(int row, int col, const MatrixBase<OtherDerived>& other)
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::copyPacket(int row, int col, const MatrixBase<OtherDerived>& other)
 {
  ei_internal_assert(row >= 0 && row < rows()
                     && col >= 0 && col < cols());
@@ -340,13 +363,22 @@ inline void MatrixBase<Derived>::copyPacket(int row, int col, const MatrixBase<O
    other.derived().template packet<LoadMode>(row, col));
 }

+/** \internal Copies the packet at the given index of other into *this.
+  *
+  * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
+  * with usual assignments.
+  *
+  * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
+  */
 template<typename Derived>
 template<typename OtherDerived, int StoreMode, int LoadMode>
-inline void MatrixBase<Derived>::copyPacket(int index, const MatrixBase<OtherDerived>& other)
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::copyPacket(int index, const MatrixBase<OtherDerived>& other)
 {
  ei_internal_assert(index >= 0 && index < size());
  derived().template writePacket<StoreMode>(index,
    other.derived().template packet<LoadMode>(index));
 }

+#endif
+
 #endif // EIGEN_COEFFS_H
--- a/Eigen/src/Core/CommaInitializer.h
+++ b/Eigen/src/Core/CommaInitializer.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -27,13 +27,13 @@
 #define EIGEN_COMMAINITIALIZER_H

 /** \class CommaInitializer
-  * 
+  *
  * \brief Helper class used by the comma initializer operator
  *
  * This class is internally used to implement the comma initializer feature. It is
  * the return type of MatrixBase::operator<<, and most of the time this is the only
  * way it is used.
-  * 
+  *
  * \sa \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished()
  */
 template<typename MatrixType>
@@ -128,7 +128,7 @@ struct CommaInitializer
  *
  * Example: \include MatrixBase_set.cpp
  * Output: \verbinclude MatrixBase_set.out
-  * 
+  *
  * \sa CommaInitializer::finished(), class CommaInitializer
  */
 template<typename Derived>
--- a/Eigen/src/Core/Cwise.h
+++ b/Eigen/src/Core/Cwise.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -31,6 +31,18 @@
 #define EIGEN_CWISE_BINOP_RETURN_TYPE(OP) \
    CwiseBinaryOp<OP<typename ei_traits<ExpressionType>::Scalar>, ExpressionType, OtherDerived>

+#define EIGEN_CWISE_PRODUCT_RETURN_TYPE \
+    CwiseBinaryOp< \
+      ei_scalar_product_op< \
+        typename ei_scalar_product_traits< \
+          typename ei_traits<ExpressionType>::Scalar, \
+          typename ei_traits<OtherDerived>::Scalar \
+        >::ReturnType \
+      >, \
+      ExpressionType, \
+      OtherDerived \
+    >
+
 /** \internal
  * convenient macro to defined the return type of a cwise unary operation */
 #define EIGEN_CWISE_UNOP_RETURN_TYPE(OP) \
@@ -52,7 +64,7 @@
  * It is the return type of MatrixBase::cwise()
  * and most of the time this is the only way it is used.
  *
-  * Note that some methods are defined in the \ref Array module.
+  * Note that some methods are defined in the \ref Array_Module array module.
  *
  * Example: \include MatrixBase_cwise_const.cpp
  * Output: \verbinclude MatrixBase_cwise_const.out
@@ -74,7 +86,7 @@ template<typename ExpressionType> class Cwise
    inline const ExpressionType& _expression() const { return m_matrix; }

    template<typename OtherDerived>
-    const EIGEN_CWISE_BINOP_RETURN_TYPE(ei_scalar_product_op)
+    const EIGEN_CWISE_PRODUCT_RETURN_TYPE
    operator*(const MatrixBase<OtherDerived> &other) const;

    template<typename OtherDerived>
@@ -116,6 +128,12 @@ template<typename ExpressionType> class Cwise

    ExpressionType& operator-=(const Scalar& scalar);

+    template<typename OtherDerived>
+    inline ExpressionType& operator*=(const MatrixBase<OtherDerived> &other);
+
+    template<typename OtherDerived>
+    inline ExpressionType& operator/=(const MatrixBase<OtherDerived> &other);
+
    template<typename OtherDerived> const EIGEN_CWISE_BINOP_RETURN_TYPE(std::less)
    operator<(const MatrixBase<OtherDerived>& other) const;

@@ -153,6 +171,11 @@ template<typename ExpressionType> class Cwise
    const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::not_equal_to)
    operator!=(Scalar s) const;

+    // allow to extend Cwise outside Eigen
+    #ifdef EIGEN_CWISE_PLUGIN
+    #include EIGEN_CWISE_PLUGIN
+    #endif
+
  protected:
    ExpressionTypeNested m_matrix;
 };
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -46,6 +46,8 @@
 template<typename BinaryOp, typename Lhs, typename Rhs>
 struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
 {
+  // even though we require Lhs and Rhs to have the same scalar type (see CwiseBinaryOp constructor),
+  // we still want to handle the case when the result type is different.
  typedef typename ei_result_of<
                     BinaryOp(
                       typename Lhs::Scalar,
@@ -84,35 +86,46 @@ class CwiseBinaryOp : ei_no_assignment_operator,
    typedef typename ei_traits<CwiseBinaryOp>::LhsNested LhsNested;
    typedef typename ei_traits<CwiseBinaryOp>::RhsNested RhsNested;

-    class InnerIterator;
-
-    inline CwiseBinaryOp(const Lhs& lhs, const Rhs& rhs, const BinaryOp& func = BinaryOp())
+    EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& lhs, const Rhs& rhs, const BinaryOp& func = BinaryOp())
      : m_lhs(lhs), m_rhs(rhs), m_functor(func)
    {
+      // we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
+      // that would take two operands of different types. If there were such an example, then this check should be
+      // moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
+      // currently they take only one typename Scalar template parameter.
+      // It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
+      // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
+      // add together a float matrix and a double matrix.
+      EIGEN_STATIC_ASSERT((ei_functor_allows_mixing_real_and_complex<BinaryOp>::ret
+                           ? int(ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret)
+                           : int(ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret)),
+        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+      // require the sizes to match
+      EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
      ei_assert(lhs.rows() == rhs.rows() && lhs.cols() == rhs.cols());
    }

-    inline int rows() const { return m_lhs.rows(); }
-    inline int cols() const { return m_lhs.cols(); }
+    EIGEN_STRONG_INLINE int rows() const { return m_lhs.rows(); }
+    EIGEN_STRONG_INLINE int cols() const { return m_lhs.cols(); }

-    inline const Scalar coeff(int row, int col) const
+    EIGEN_STRONG_INLINE const Scalar coeff(int row, int col) const
    {
      return m_functor(m_lhs.coeff(row, col), m_rhs.coeff(row, col));
    }

    template<int LoadMode>
-    inline PacketScalar packet(int row, int col) const
+    EIGEN_STRONG_INLINE PacketScalar packet(int row, int col) const
    {
      return m_functor.packetOp(m_lhs.template packet<LoadMode>(row, col), m_rhs.template packet<LoadMode>(row, col));
    }

-    inline const Scalar coeff(int index) const
+    EIGEN_STRONG_INLINE const Scalar coeff(int index) const
    {
      return m_functor(m_lhs.coeff(index), m_rhs.coeff(index));
    }

    template<int LoadMode>
-    inline PacketScalar packet(int index) const
+    EIGEN_STRONG_INLINE PacketScalar packet(int index) const
    {
      return m_functor.packetOp(m_lhs.template packet<LoadMode>(index), m_rhs.template packet<LoadMode>(index));
    }
@@ -131,7 +144,7 @@ class CwiseBinaryOp : ei_no_assignment_operator,
  */
 template<typename Derived>
 template<typename OtherDerived>
-inline const CwiseBinaryOp<ei_scalar_difference_op<typename ei_traits<Derived>::Scalar>,
+EIGEN_STRONG_INLINE const CwiseBinaryOp<ei_scalar_difference_op<typename ei_traits<Derived>::Scalar>,
                                 Derived, OtherDerived>
 MatrixBase<Derived>::operator-(const MatrixBase<OtherDerived> &other) const
 {
@@ -145,7 +158,7 @@ MatrixBase<Derived>::operator-(const MatrixBase<OtherDerived> &other) const
  */
 template<typename Derived>
 template<typename OtherDerived>
-inline Derived &
+EIGEN_STRONG_INLINE Derived &
 MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
 {
  return *this = *this - other;
@@ -161,7 +174,7 @@ MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
  */
 template<typename Derived>
 template<typename OtherDerived>
-inline const CwiseBinaryOp<ei_scalar_sum_op<typename ei_traits<Derived>::Scalar>, Derived, OtherDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<ei_scalar_sum_op<typename ei_traits<Derived>::Scalar>, Derived, OtherDerived>
 MatrixBase<Derived>::operator+(const MatrixBase<OtherDerived> &other) const
 {
  return CwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived, OtherDerived>(derived(), other.derived());
@@ -173,7 +186,7 @@ MatrixBase<Derived>::operator+(const MatrixBase<OtherDerived> &other) const
  */
 template<typename Derived>
 template<typename OtherDerived>
-inline Derived &
+EIGEN_STRONG_INLINE Derived &
 MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
 {
  return *this = *this + other;
@@ -188,10 +201,10 @@ MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
  */
 template<typename ExpressionType>
 template<typename OtherDerived>
-inline const EIGEN_CWISE_BINOP_RETURN_TYPE(ei_scalar_product_op)
+EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE
 Cwise<ExpressionType>::operator*(const MatrixBase<OtherDerived> &other) const
 {
-  return EIGEN_CWISE_BINOP_RETURN_TYPE(ei_scalar_product_op)(_expression(), other.derived());
+  return EIGEN_CWISE_PRODUCT_RETURN_TYPE(_expression(), other.derived());
 }

 /** \returns an expression of the coefficient-wise quotient of *this and \a other
@@ -203,12 +216,40 @@ Cwise<ExpressionType>::operator*(const MatrixBase<OtherDerived> &other) const
  */
 template<typename ExpressionType>
 template<typename OtherDerived>
-inline const EIGEN_CWISE_BINOP_RETURN_TYPE(ei_scalar_quotient_op)
+EIGEN_STRONG_INLINE const EIGEN_CWISE_BINOP_RETURN_TYPE(ei_scalar_quotient_op)
 Cwise<ExpressionType>::operator/(const MatrixBase<OtherDerived> &other) const
 {
  return EIGEN_CWISE_BINOP_RETURN_TYPE(ei_scalar_quotient_op)(_expression(), other.derived());
 }

+/** Replaces this expression by its coefficient-wise product with \a other.
+  *
+  * Example: \include Cwise_times_equal.cpp
+  * Output: \verbinclude Cwise_times_equal.out
+  *
+  * \sa operator*(), operator/=()
+  */
+template<typename ExpressionType>
+template<typename OtherDerived>
+inline ExpressionType& Cwise<ExpressionType>::operator*=(const MatrixBase<OtherDerived> &other)
+{
+  return m_matrix.const_cast_derived() = *this * other;
+}
+
+/** Replaces this expression by its coefficient-wise quotient by \a other.
+  *
+  * Example: \include Cwise_slash_equal.cpp
+  * Output: \verbinclude Cwise_slash_equal.out
+  *
+  * \sa operator/(), operator*=()
+  */
+template<typename ExpressionType>
+template<typename OtherDerived>
+inline ExpressionType& Cwise<ExpressionType>::operator/=(const MatrixBase<OtherDerived> &other)
+{
+  return m_matrix.const_cast_derived() = *this / other;
+}
+
 /** \returns an expression of the coefficient-wise min of *this and \a other
  *
  * Example: \include Cwise_min.cpp
@@ -218,7 +259,7 @@ Cwise<ExpressionType>::operator/(const MatrixBase<OtherDerived> &other) const
  */
 template<typename ExpressionType>
 template<typename OtherDerived>
-inline const EIGEN_CWISE_BINOP_RETURN_TYPE(ei_scalar_min_op)
+EIGEN_STRONG_INLINE const EIGEN_CWISE_BINOP_RETURN_TYPE(ei_scalar_min_op)
 Cwise<ExpressionType>::min(const MatrixBase<OtherDerived> &other) const
 {
  return EIGEN_CWISE_BINOP_RETURN_TYPE(ei_scalar_min_op)(_expression(), other.derived());
@@ -233,7 +274,7 @@ Cwise<ExpressionType>::min(const MatrixBase<OtherDerived> &other) const
  */
 template<typename ExpressionType>
 template<typename OtherDerived>
-inline const EIGEN_CWISE_BINOP_RETURN_TYPE(ei_scalar_max_op)
+EIGEN_STRONG_INLINE const EIGEN_CWISE_BINOP_RETURN_TYPE(ei_scalar_max_op)
 Cwise<ExpressionType>::max(const MatrixBase<OtherDerived> &other) const
 {
  return EIGEN_CWISE_BINOP_RETURN_TYPE(ei_scalar_max_op)(_expression(), other.derived());
@@ -254,7 +295,7 @@ Cwise<ExpressionType>::max(const MatrixBase<OtherDerived> &other) const
  */
 template<typename Derived>
 template<typename CustomBinaryOp, typename OtherDerived>
-inline const CwiseBinaryOp<CustomBinaryOp, Derived, OtherDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<CustomBinaryOp, Derived, OtherDerived>
 MatrixBase<Derived>::binaryExpr(const MatrixBase<OtherDerived> &other, const CustomBinaryOp& func) const
 {
  return CwiseBinaryOp<CustomBinaryOp, Derived, OtherDerived>(derived(), other.derived(), func);
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -41,14 +41,9 @@
  * \sa class CwiseUnaryOp, class CwiseBinaryOp, MatrixBase::NullaryExpr()
  */
 template<typename NullaryOp, typename MatrixType>
-struct ei_traits<CwiseNullaryOp<NullaryOp, MatrixType> >
+struct ei_traits<CwiseNullaryOp<NullaryOp, MatrixType> > : ei_traits<MatrixType>
 {
-  typedef typename ei_traits<MatrixType>::Scalar Scalar;
  enum {
-    RowsAtCompileTime = ei_traits<MatrixType>::RowsAtCompileTime,
-    ColsAtCompileTime = ei_traits<MatrixType>::ColsAtCompileTime,
-    MaxRowsAtCompileTime = ei_traits<MatrixType>::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = ei_traits<MatrixType>::MaxColsAtCompileTime,
    Flags = (ei_traits<MatrixType>::Flags
      & (  HereditaryBits
         | (ei_functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
@@ -75,21 +70,21 @@ class CwiseNullaryOp : ei_no_assignment_operator,
          && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
    }

-    int rows() const { return m_rows.value(); }
-    int cols() const { return m_cols.value(); }
+    EIGEN_STRONG_INLINE int rows() const { return m_rows.value(); }
+    EIGEN_STRONG_INLINE int cols() const { return m_cols.value(); }

-    const Scalar coeff(int rows, int cols) const
+    EIGEN_STRONG_INLINE const Scalar coeff(int rows, int cols) const
    {
      return m_functor(rows, cols);
    }

    template<int LoadMode>
-    PacketScalar packet(int, int) const
+    EIGEN_STRONG_INLINE PacketScalar packet(int, int) const
    {
      return m_functor.packetOp();
    }

-    const Scalar coeff(int index) const
+    EIGEN_STRONG_INLINE const Scalar coeff(int index) const
    {
      if(RowsAtCompileTime == 1)
        return m_functor(0, index);
@@ -98,7 +93,7 @@ class CwiseNullaryOp : ei_no_assignment_operator,
    }

    template<int LoadMode>
-    PacketScalar packet(int) const
+    EIGEN_STRONG_INLINE PacketScalar packet(int) const
    {
      return m_functor.packetOp();
    }
@@ -125,7 +120,7 @@ class CwiseNullaryOp : ei_no_assignment_operator,
  */
 template<typename Derived>
 template<typename CustomNullaryOp>
-const CwiseNullaryOp<CustomNullaryOp, Derived>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
 MatrixBase<Derived>::NullaryExpr(int rows, int cols, const CustomNullaryOp& func)
 {
  return CwiseNullaryOp<CustomNullaryOp, Derived>(rows, cols, func);
@@ -148,9 +143,10 @@ MatrixBase<Derived>::NullaryExpr(int rows, int cols, const CustomNullaryOp& func
  */
 template<typename Derived>
 template<typename CustomNullaryOp>
-const CwiseNullaryOp<CustomNullaryOp, Derived>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
 MatrixBase<Derived>::NullaryExpr(int size, const CustomNullaryOp& func)
 {
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  ei_assert(IsVectorAtCompileTime);
  if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, Derived>(1, size, func);
  else return CwiseNullaryOp<CustomNullaryOp, Derived>(size, 1, func);
@@ -167,7 +163,7 @@ MatrixBase<Derived>::NullaryExpr(int size, const CustomNullaryOp& func)
  */
 template<typename Derived>
 template<typename CustomNullaryOp>
-const CwiseNullaryOp<CustomNullaryOp, Derived>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
 MatrixBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
 {
  return CwiseNullaryOp<CustomNullaryOp, Derived>(RowsAtCompileTime, ColsAtCompileTime, func);
@@ -187,7 +183,7 @@ MatrixBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
  * \sa class CwiseNullaryOp
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::ConstantReturnType
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::ConstantReturnType
 MatrixBase<Derived>::Constant(int rows, int cols, const Scalar& value)
 {
  return NullaryExpr(rows, cols, ei_scalar_constant_op<Scalar>(value));
@@ -209,7 +205,7 @@ MatrixBase<Derived>::Constant(int rows, int cols, const Scalar& value)
  * \sa class CwiseNullaryOp
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::ConstantReturnType
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::ConstantReturnType
 MatrixBase<Derived>::Constant(int size, const Scalar& value)
 {
  return NullaryExpr(size, ei_scalar_constant_op<Scalar>(value));
@@ -225,34 +221,91 @@ MatrixBase<Derived>::Constant(int size, const Scalar& value)
  * \sa class CwiseNullaryOp
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::ConstantReturnType
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::ConstantReturnType
 MatrixBase<Derived>::Constant(const Scalar& value)
 {
  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
  return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, ei_scalar_constant_op<Scalar>(value));
 }

+/** \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
 template<typename Derived>
 bool MatrixBase<Derived>::isApproxToConstant
 (const Scalar& value, RealScalar prec) const
 {
-  for(int j = 0; j < cols(); j++)
-    for(int i = 0; i < rows(); i++)
+  for(int j = 0; j < cols(); ++j)
+    for(int i = 0; i < rows(); ++i)
      if(!ei_isApprox(coeff(i, j), value, prec))
        return false;
  return true;
 }

-/** Sets all coefficients in this expression to \a value.
+/** This is just an alias for isApproxToConstant().
  *
-  * \sa class CwiseNullaryOp, Zero(), Ones()
+  * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
+template<typename Derived>
+bool MatrixBase<Derived>::isConstant
+(const Scalar& value, RealScalar prec) const
+{
+  return isApproxToConstant(value, prec);
+}
+
+/** Alias for setConstant(): sets all coefficients in this expression to \a value.
+  *
+  * \sa setConstant(), Constant(), class CwiseNullaryOp
  */
 template<typename Derived>
-Derived& MatrixBase<Derived>::setConstant(const Scalar& value)
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::fill(const Scalar& value)
+{
+  setConstant(value);
+}
+
+/** Sets all coefficients in this expression to \a value.
+  *
+  * \sa fill(), setConstant(int,const Scalar&), setConstant(int,int,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setConstant(const Scalar& value)
 {
  return derived() = Constant(rows(), cols(), value);
 }

+/** Resizes to the given \a size, and sets all coefficients in this expression to the given \a value.
+  *
+  * \only_for_vectors
+  *
+  * Example: \include Matrix_set_int.cpp
+  * Output: \verbinclude Matrix_setConstant_int.out
+  *
+  * \sa MatrixBase::setConstant(const Scalar&), setConstant(int,int,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
+  */
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+EIGEN_STRONG_INLINE Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>&
+Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::setConstant(int size, const Scalar& value)
+{
+  resize(size);
+  return setConstant(value);
+}
+
+/** Resizes to the given size, and sets all coefficients in this expression to the given \a value.
+  *
+  * \param rows the new number of rows
+  * \param cols the new number of columns
+  *
+  * Example: \include Matrix_setConstant_int_int.cpp
+  * Output: \verbinclude Matrix_setConstant_int_int.out
+  *
+  * \sa MatrixBase::setConstant(const Scalar&), setConstant(int,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
+  */
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+EIGEN_STRONG_INLINE Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>&
+Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::setConstant(int rows, int cols, const Scalar& value)
+{
+  resize(rows, cols);
+  return setConstant(value);
+}
+
+
 // zero:

 /** \returns an expression of a zero matrix.
@@ -272,7 +325,7 @@ Derived& MatrixBase<Derived>::setConstant(const Scalar& value)
  * \sa Zero(), Zero(int)
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::ConstantReturnType
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::ConstantReturnType
 MatrixBase<Derived>::Zero(int rows, int cols)
 {
  return Constant(rows, cols, Scalar(0));
@@ -295,7 +348,7 @@ MatrixBase<Derived>::Zero(int rows, int cols)
  * \sa Zero(), Zero(int,int)
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::ConstantReturnType
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::ConstantReturnType
 MatrixBase<Derived>::Zero(int size)
 {
  return Constant(size, Scalar(0));
@@ -312,7 +365,7 @@ MatrixBase<Derived>::Zero(int size)
  * \sa Zero(int), Zero(int,int)
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::ConstantReturnType
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::ConstantReturnType
 MatrixBase<Derived>::Zero()
 {
  return Constant(Scalar(0));
@@ -327,11 +380,10 @@ MatrixBase<Derived>::Zero()
  * \sa class CwiseNullaryOp, Zero()
  */
 template<typename Derived>
-bool MatrixBase<Derived>::isZero
-(RealScalar prec) const
+bool MatrixBase<Derived>::isZero(RealScalar prec) const
 {
-  for(int j = 0; j < cols(); j++)
-    for(int i = 0; i < rows(); i++)
+  for(int j = 0; j < cols(); ++j)
+    for(int i = 0; i < rows(); ++i)
      if(!ei_isMuchSmallerThan(coeff(i, j), static_cast<Scalar>(1), prec))
        return false;
  return true;
@@ -345,11 +397,46 @@ bool MatrixBase<Derived>::isZero
  * \sa class CwiseNullaryOp, Zero()
  */
 template<typename Derived>
-Derived& MatrixBase<Derived>::setZero()
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setZero()
 {
  return setConstant(Scalar(0));
 }

+/** Resizes to the given \a size, and sets all coefficients in this expression to zero.
+  *
+  * \only_for_vectors
+  *
+  * Example: \include Matrix_setZero_int.cpp
+  * Output: \verbinclude Matrix_setZero_int.out
+  *
+  * \sa MatrixBase::setZero(), setZero(int,int), class CwiseNullaryOp, MatrixBase::Zero()
+  */
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+EIGEN_STRONG_INLINE Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>&
+Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::setZero(int size)
+{
+  resize(size);
+  return setConstant(Scalar(0));
+}
+
+/** Resizes to the given size, and sets all coefficients in this expression to zero.
+  *
+  * \param rows the new number of rows
+  * \param cols the new number of columns
+  *
+  * Example: \include Matrix_setZero_int_int.cpp
+  * Output: \verbinclude Matrix_setZero_int_int.out
+  *
+  * \sa MatrixBase::setZero(), setZero(int), class CwiseNullaryOp, MatrixBase::Zero()
+  */
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+EIGEN_STRONG_INLINE Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>&
+Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::setZero(int rows, int cols)
+{
+  resize(rows, cols);
+  return setConstant(Scalar(0));
+}
+
 // ones:

 /** \returns an expression of a matrix where all coefficients equal one.
@@ -369,7 +456,7 @@ Derived& MatrixBase<Derived>::setZero()
  * \sa Ones(), Ones(int), isOnes(), class Ones
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::ConstantReturnType
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::ConstantReturnType
 MatrixBase<Derived>::Ones(int rows, int cols)
 {
  return Constant(rows, cols, Scalar(1));
@@ -392,7 +479,7 @@ MatrixBase<Derived>::Ones(int rows, int cols)
  * \sa Ones(), Ones(int,int), isOnes(), class Ones
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::ConstantReturnType
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::ConstantReturnType
 MatrixBase<Derived>::Ones(int size)
 {
  return Constant(size, Scalar(1));
@@ -409,7 +496,7 @@ MatrixBase<Derived>::Ones(int size)
  * \sa Ones(int), Ones(int,int), isOnes(), class Ones
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::ConstantReturnType
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::ConstantReturnType
 MatrixBase<Derived>::Ones()
 {
  return Constant(Scalar(1));
@@ -438,11 +525,46 @@ bool MatrixBase<Derived>::isOnes
  * \sa class CwiseNullaryOp, Ones()
  */
 template<typename Derived>
-Derived& MatrixBase<Derived>::setOnes()
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setOnes()
 {
  return setConstant(Scalar(1));
 }

+/** Resizes to the given \a size, and sets all coefficients in this expression to one.
+  *
+  * \only_for_vectors
+  *
+  * Example: \include Matrix_setOnes_int.cpp
+  * Output: \verbinclude Matrix_setOnes_int.out
+  *
+  * \sa MatrixBase::setOnes(), setOnes(int,int), class CwiseNullaryOp, MatrixBase::Ones()
+  */
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+EIGEN_STRONG_INLINE Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>&
+Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::setOnes(int size)
+{
+  resize(size);
+  return setConstant(Scalar(1));
+}
+
+/** Resizes to the given size, and sets all coefficients in this expression to one.
+  *
+  * \param rows the new number of rows
+  * \param cols the new number of columns
+  *
+  * Example: \include Matrix_setOnes_int_int.cpp
+  * Output: \verbinclude Matrix_setOnes_int_int.out
+  *
+  * \sa MatrixBase::setOnes(), setOnes(int), class CwiseNullaryOp, MatrixBase::Ones()
+  */
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+EIGEN_STRONG_INLINE Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>&
+Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::setOnes(int rows, int cols)
+{
+  resize(rows, cols);
+  return setConstant(Scalar(1));
+}
+
 // Identity:

 /** \returns an expression of the identity matrix (not necessarily square).
@@ -462,7 +584,7 @@ Derived& MatrixBase<Derived>::setOnes()
  * \sa Identity(), setIdentity(), isIdentity()
  */
 template<typename Derived>
-inline const typename MatrixBase<Derived>::IdentityReturnType
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
 MatrixBase<Derived>::Identity(int rows, int cols)
 {
  return NullaryExpr(rows, cols, ei_scalar_identity_op<Scalar>());
@@ -479,7 +601,7 @@ MatrixBase<Derived>::Identity(int rows, int cols)
  * \sa Identity(int,int), setIdentity(), isIdentity()
  */
 template<typename Derived>
-inline const typename MatrixBase<Derived>::IdentityReturnType
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
 MatrixBase<Derived>::Identity()
 {
  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
@@ -499,9 +621,9 @@ template<typename Derived>
 bool MatrixBase<Derived>::isIdentity
 (RealScalar prec) const
 {
-  for(int j = 0; j < cols(); j++)
+  for(int j = 0; j < cols(); ++j)
  {
-    for(int i = 0; i < rows(); i++)
+    for(int i = 0; i < rows(); ++i)
    {
      if(i == j)
      {
@@ -521,7 +643,7 @@ bool MatrixBase<Derived>::isIdentity
 template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
 struct ei_setIdentity_impl
 {
-  static inline Derived& run(Derived& m)
+  static EIGEN_STRONG_INLINE Derived& run(Derived& m)
  {
    return m = Derived::Identity(m.rows(), m.cols());
  }
@@ -530,11 +652,11 @@ struct ei_setIdentity_impl
 template<typename Derived>
 struct ei_setIdentity_impl<Derived, true>
 {
-  static inline Derived& run(Derived& m)
+  static EIGEN_STRONG_INLINE Derived& run(Derived& m)
  {
    m.setZero();
    const int size = std::min(m.rows(), m.cols());
-    for(int i = 0; i < size; i++) m.coeffRef(i,i) = typename Derived::Scalar(1);
+    for(int i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
    return m;
  }
 };
@@ -547,11 +669,29 @@ struct ei_setIdentity_impl<Derived, true>
  * \sa class CwiseNullaryOp, Identity(), Identity(int,int), isIdentity()
  */
 template<typename Derived>
-inline Derived& MatrixBase<Derived>::setIdentity()
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
 {
  return ei_setIdentity_impl<Derived>::run(derived());
 }

+/** Resizes to the given size, and writes the identity expression (not necessarily square) into *this.
+  *
+  * \param rows the new number of rows
+  * \param cols the new number of columns
+  *
+  * Example: \include Matrix_setIdentity_int_int.cpp
+  * Output: \verbinclude Matrix_setIdentity_int_int.out
+  *
+  * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity()
+  */
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+EIGEN_STRONG_INLINE Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>&
+Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::setIdentity(int rows, int cols)
+{
+  resize(rows, cols);
+  return setIdentity();
+}
+
 /** \returns an expression of the i-th unit (basis) vector.
  *
  * \only_for_vectors
@@ -559,9 +699,9 @@ inline Derived& MatrixBase<Derived>::setIdentity()
  * \sa MatrixBase::Unit(int), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(int size, int i)
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(int size, int i)
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return BasisReturnType(SquareMatrixType::Identity(size,size), i);
 }

@@ -574,9 +714,9 @@ const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(in
  * \sa MatrixBase::Unit(int,int), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(int i)
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(int i)
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return BasisReturnType(SquareMatrixType::Identity(),i);
 }

@@ -587,7 +727,7 @@ const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(in
  * \sa MatrixBase::Unit(int,int), MatrixBase::Unit(int), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
 { return Derived::Unit(0); }

 /** \returns an expression of the Y axis unit vector (0,1{,0}^*)
@@ -597,7 +737,7 @@ const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
  * \sa MatrixBase::Unit(int,int), MatrixBase::Unit(int), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
 { return Derived::Unit(1); }

 /** \returns an expression of the Z axis unit vector (0,0,1{,0}^*)
@@ -607,7 +747,7 @@ const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
  * \sa MatrixBase::Unit(int,int), MatrixBase::Unit(int), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
 { return Derived::Unit(2); }

 /** \returns an expression of the W axis unit vector (0,0,0,1)
@@ -617,7 +757,7 @@ const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
  * \sa MatrixBase::Unit(int,int), MatrixBase::Unit(int), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
  */
 template<typename Derived>
-const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
 { return Derived::Unit(3); }

 #endif // EIGEN_CWISE_NULLARY_OP_H
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -41,6 +41,7 @@
  */
 template<typename UnaryOp, typename MatrixType>
 struct ei_traits<CwiseUnaryOp<UnaryOp, MatrixType> >
+ : ei_traits<MatrixType>
 {
  typedef typename ei_result_of<
                     UnaryOp(typename MatrixType::Scalar)
@@ -48,16 +49,10 @@ struct ei_traits<CwiseUnaryOp<UnaryOp, MatrixType> >
  typedef typename MatrixType::Nested MatrixTypeNested;
  typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
  enum {
-    MatrixTypeCoeffReadCost = _MatrixTypeNested::CoeffReadCost,
-    MatrixTypeFlags = _MatrixTypeNested::Flags,
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    Flags = (MatrixTypeFlags & (
+    Flags = (_MatrixTypeNested::Flags & (
      HereditaryBits | LinearAccessBit | AlignedBit
      | (ei_functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))),
-    CoeffReadCost = MatrixTypeCoeffReadCost + ei_functor_traits<UnaryOp>::Cost
+    CoeffReadCost = _MatrixTypeNested::CoeffReadCost + ei_functor_traits<UnaryOp>::Cost
  };
 };

@@ -69,32 +64,30 @@ class CwiseUnaryOp : ei_no_assignment_operator,

    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)

-    class InnerIterator;
-
    inline CwiseUnaryOp(const MatrixType& mat, const UnaryOp& func = UnaryOp())
      : m_matrix(mat), m_functor(func) {}

-    inline int rows() const { return m_matrix.rows(); }
-    inline int cols() const { return m_matrix.cols(); }
+    EIGEN_STRONG_INLINE int rows() const { return m_matrix.rows(); }
+    EIGEN_STRONG_INLINE int cols() const { return m_matrix.cols(); }

-    inline const Scalar coeff(int row, int col) const
+    EIGEN_STRONG_INLINE const Scalar coeff(int row, int col) const
    {
      return m_functor(m_matrix.coeff(row, col));
    }

    template<int LoadMode>
-    inline PacketScalar packet(int row, int col) const
+    EIGEN_STRONG_INLINE PacketScalar packet(int row, int col) const
    {
      return m_functor.packetOp(m_matrix.template packet<LoadMode>(row, col));
    }

-    inline const Scalar coeff(int index) const
+    EIGEN_STRONG_INLINE const Scalar coeff(int index) const
    {
      return m_functor(m_matrix.coeff(index));
    }

    template<int LoadMode>
-    inline PacketScalar packet(int index) const
+    EIGEN_STRONG_INLINE PacketScalar packet(int index) const
    {
      return m_functor.packetOp(m_matrix.template packet<LoadMode>(index));
    }
@@ -119,7 +112,7 @@ class CwiseUnaryOp : ei_no_assignment_operator,
  */
 template<typename Derived>
 template<typename CustomUnaryOp>
-inline const CwiseUnaryOp<CustomUnaryOp, Derived>
+EIGEN_STRONG_INLINE const CwiseUnaryOp<CustomUnaryOp, Derived>
 MatrixBase<Derived>::unaryExpr(const CustomUnaryOp& func) const
 {
  return CwiseUnaryOp<CustomUnaryOp, Derived>(derived(), func);
@@ -128,7 +121,7 @@ MatrixBase<Derived>::unaryExpr(const CustomUnaryOp& func) const
 /** \returns an expression of the opposite of \c *this
  */
 template<typename Derived>
-inline const CwiseUnaryOp<ei_scalar_opposite_op<typename ei_traits<Derived>::Scalar>,Derived>
+EIGEN_STRONG_INLINE const CwiseUnaryOp<ei_scalar_opposite_op<typename ei_traits<Derived>::Scalar>,Derived>
 MatrixBase<Derived>::operator-() const
 {
  return derived();
@@ -142,7 +135,7 @@ MatrixBase<Derived>::operator-() const
  * \sa abs2()
  */
 template<typename ExpressionType>
-inline const EIGEN_CWISE_UNOP_RETURN_TYPE(ei_scalar_abs_op)
+EIGEN_STRONG_INLINE const EIGEN_CWISE_UNOP_RETURN_TYPE(ei_scalar_abs_op)
 Cwise<ExpressionType>::abs() const
 {
  return _expression();
@@ -156,7 +149,7 @@ Cwise<ExpressionType>::abs() const
  * \sa abs(), square()
  */
 template<typename ExpressionType>
-inline const EIGEN_CWISE_UNOP_RETURN_TYPE(ei_scalar_abs2_op)
+EIGEN_STRONG_INLINE const EIGEN_CWISE_UNOP_RETURN_TYPE(ei_scalar_abs2_op)
 Cwise<ExpressionType>::abs2() const
 {
  return _expression();
@@ -166,7 +159,7 @@ Cwise<ExpressionType>::abs2() const
  *
  * \sa adjoint() */
 template<typename Derived>
-inline typename MatrixBase<Derived>::ConjugateReturnType
+EIGEN_STRONG_INLINE typename MatrixBase<Derived>::ConjugateReturnType
 MatrixBase<Derived>::conjugate() const
 {
  return ConjugateReturnType(derived());
@@ -174,13 +167,17 @@ MatrixBase<Derived>::conjugate() const

 /** \returns an expression of the real part of \c *this.
  *
-  * \sa adjoint() */
+  * \sa imag() */
 template<typename Derived>
-inline const typename MatrixBase<Derived>::RealReturnType
-MatrixBase<Derived>::real() const
-{
-  return derived();
-}
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::RealReturnType
+MatrixBase<Derived>::real() const { return derived(); }
+
+/** \returns an expression of the imaginary part of \c *this.
+  *
+  * \sa real() */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::ImagReturnType
+MatrixBase<Derived>::imag() const { return derived(); }

 /** \returns an expression of *this with the \a Scalar type casted to
  * \a NewScalar.
@@ -191,7 +188,11 @@ MatrixBase<Derived>::real() const
  */
 template<typename Derived>
 template<typename NewType>
-inline const CwiseUnaryOp<ei_scalar_cast_op<typename ei_traits<Derived>::Scalar, NewType>, Derived>
+EIGEN_STRONG_INLINE
+typename ei_cast_return_type<
+            Derived,
+            const CwiseUnaryOp<ei_scalar_cast_op<typename ei_traits<Derived>::Scalar, NewType>, Derived>
+          >::type
 MatrixBase<Derived>::cast() const
 {
  return derived();
@@ -199,7 +200,7 @@ MatrixBase<Derived>::cast() const

 /** \relates MatrixBase */
 template<typename Derived>
-inline const typename MatrixBase<Derived>::ScalarMultipleReturnType
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::ScalarMultipleReturnType
 MatrixBase<Derived>::operator*(const Scalar& scalar) const
 {
  return CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, Derived>
@@ -208,7 +209,7 @@ MatrixBase<Derived>::operator*(const Scalar& scalar) const

 /** \relates MatrixBase */
 template<typename Derived>
-inline const CwiseUnaryOp<ei_scalar_quotient1_op<typename ei_traits<Derived>::Scalar>, Derived>
+EIGEN_STRONG_INLINE const CwiseUnaryOp<ei_scalar_quotient1_op<typename ei_traits<Derived>::Scalar>, Derived>
 MatrixBase<Derived>::operator/(const Scalar& scalar) const
 {
  return CwiseUnaryOp<ei_scalar_quotient1_op<Scalar>, Derived>
@@ -216,14 +217,14 @@ MatrixBase<Derived>::operator/(const Scalar& scalar) const
 }

 template<typename Derived>
-inline Derived&
+EIGEN_STRONG_INLINE Derived&
 MatrixBase<Derived>::operator*=(const Scalar& other)
 {
  return *this = *this * other;
 }

 template<typename Derived>
-inline Derived&
+EIGEN_STRONG_INLINE Derived&
 MatrixBase<Derived>::operator/=(const Scalar& other)
 {
  return *this = *this / other;
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@@ -0,0 +1,187 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_DIAGONAL_H
+#define EIGEN_DIAGONAL_H
+
+/** \class Diagonal
+  *
+  * \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix
+  *
+  * \param MatrixType the type of the object in which we are taking a sub/main/super diagonal
+  * \param Index the index of the sub/super diagonal. The default is 0 and it means the main diagonal.
+  *              A positive value means a superdiagonal, a negative value means a subdiagonal.
+  *              You can also use Dynamic so the index can be set at runtime.
+  *
+  * The matrix is not required to be square.
+  *
+  * This class represents an expression of the main diagonal, or any sub/super diagonal
+  * of a square matrix. It is the return type of MatrixBase::diagonal() and MatrixBase::diagonal(int) and most of the
+  * time this is the only way it is used.
+  *
+  * \sa MatrixBase::diagonal(), MatrixBase::diagonal(int)
+  */
+template<typename MatrixType, int Index>
+struct ei_traits<Diagonal<MatrixType,Index> >
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
+  typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
+  enum {
+    AbsIndex = Index<0 ? -Index : Index, // only used if Index != Dynamic
+    RowsAtCompileTime = (int(Index) == Dynamic || int(MatrixType::SizeAtCompileTime) == Dynamic) ? Dynamic
+                      : (EIGEN_ENUM_MIN(MatrixType::RowsAtCompileTime,
+                                        MatrixType::ColsAtCompileTime) - AbsIndex),
+    ColsAtCompileTime = 1,
+    MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic
+                         : (EIGEN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime,
+                                          MatrixType::MaxColsAtCompileTime) - AbsIndex),
+    MaxColsAtCompileTime = 1,
+    Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit),
+    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+  };
+};
+
+template<typename MatrixType, int Index> class Diagonal
+   : public MatrixBase<Diagonal<MatrixType, Index> >
+{
+    // some compilers may fail to optimize std::max etc in case of compile-time constants...
+    EIGEN_STRONG_INLINE int absIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
+    EIGEN_STRONG_INLINE int rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
+    EIGEN_STRONG_INLINE int colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
+    
+  public:
+
+    EIGEN_GENERIC_PUBLIC_INTERFACE(Diagonal)
+
+    inline Diagonal(const MatrixType& matrix, int index = Index) : m_matrix(matrix), m_index(index) {}
+
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
+
+    inline int rows() const{ return m_matrix.diagonalSize() - absIndex(); }
+    inline int cols() const { return 1; }
+
+    inline Scalar& coeffRef(int row, int)
+    {
+      return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
+    }
+
+    inline const Scalar coeff(int row, int) const
+    {
+      return m_matrix.coeff(row+rowOffset(), row+colOffset());
+    }
+
+    inline Scalar& coeffRef(int index)
+    {
+      return m_matrix.const_cast_derived().coeffRef(index+rowOffset(), index+colOffset());
+    }
+
+    inline const Scalar coeff(int index) const
+    {
+      return m_matrix.coeff(index+rowOffset(), index+colOffset());
+    }
+
+  protected:
+    const typename MatrixType::Nested m_matrix;
+    const ei_int_if_dynamic<Index> m_index;
+};
+
+/** \returns an expression of the main diagonal of the matrix \c *this
+  *
+  * \c *this is not required to be square.
+  *
+  * Example: \include MatrixBase_diagonal.cpp
+  * Output: \verbinclude MatrixBase_diagonal.out
+  *
+  * \sa class Diagonal */
+template<typename Derived>
+inline Diagonal<Derived, 0>
+MatrixBase<Derived>::diagonal()
+{
+  return Diagonal<Derived, 0>(derived());
+}
+
+/** This is the const version of diagonal(). */
+template<typename Derived>
+inline const Diagonal<Derived, 0>
+MatrixBase<Derived>::diagonal() const
+{
+  return Diagonal<Derived, 0>(derived());
+}
+
+/** \returns an expression of the \a Index-th sub or super diagonal of the matrix \c *this
+  *
+  * \c *this is not required to be square.
+  *
+  * The template parameter \a Index represent a super diagonal if \a Index > 0
+  * and a sub diagonal otherwise. \a Index == 0 is equivalent to the main diagonal.
+  *
+  * Example: \include MatrixBase_diagonal_int.cpp
+  * Output: \verbinclude MatrixBase_diagonal_int.out
+  *
+  * \sa MatrixBase::diagonal(), class Diagonal */
+template<typename Derived>
+inline Diagonal<Derived, Dynamic>
+MatrixBase<Derived>::diagonal(int index)
+{
+  return Diagonal<Derived, Dynamic>(derived(), index);
+}
+
+/** This is the const version of diagonal(int). */
+template<typename Derived>
+inline const Diagonal<Derived, Dynamic>
+MatrixBase<Derived>::diagonal(int index) const
+{
+  return Diagonal<Derived, Dynamic>(derived(), index);
+}
+
+/** \returns an expression of the \a Index-th sub or super diagonal of the matrix \c *this
+  *
+  * \c *this is not required to be square.
+  *
+  * The template parameter \a Index represent a super diagonal if \a Index > 0
+  * and a sub diagonal otherwise. \a Index == 0 is equivalent to the main diagonal.
+  *
+  * Example: \include MatrixBase_diagonal_template_int.cpp
+  * Output: \verbinclude MatrixBase_diagonal_template_int.out
+  *
+  * \sa MatrixBase::diagonal(), class Diagonal */
+template<typename Derived>
+template<int Index>
+inline Diagonal<Derived,Index>
+MatrixBase<Derived>::diagonal()
+{
+  return Diagonal<Derived,Index>(derived());
+}
+
+/** This is the const version of diagonal<int>(). */
+template<typename Derived>
+template<int Index>
+inline const Diagonal<Derived,Index>
+MatrixBase<Derived>::diagonal() const
+{
+  return Diagonal<Derived,Index>(derived());
+}
+
+#endif // EIGEN_DIAGONAL_H
--- a/Eigen/src/Core/DiagonalCoeffs.h
+++ b/Eigen/src/Core/DiagonalCoeffs.h
@@ -1,124 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra. Eigen itself is part of the KDE project.
-//
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
-//
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
-
-#ifndef EIGEN_DIAGONALCOEFFS_H
-#define EIGEN_DIAGONALCOEFFS_H
-
-/** \class DiagonalCoeffs
-  *
-  * \brief Expression of the main diagonal of a matrix
-  *
-  * \param MatrixType the type of the object in which we are taking the main diagonal
-  *
-  * The matrix is not required to be square.
-  *
-  * This class represents an expression of the main diagonal of a square matrix.
-  * It is the return type of MatrixBase::diagonal() and most of the time this is
-  * the only way it is used.
-  *
-  * \sa MatrixBase::diagonal()
-  */
-template<typename MatrixType>
-struct ei_traits<DiagonalCoeffs<MatrixType> >
-{
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
-  typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
-  enum {
-    RowsAtCompileTime = int(MatrixType::SizeAtCompileTime) == Dynamic ? Dynamic
-                      : EIGEN_ENUM_MIN(MatrixType::RowsAtCompileTime,
-                                       MatrixType::ColsAtCompileTime),
-    ColsAtCompileTime = 1,
-    MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic
-                            : EIGEN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime,
-                                             MatrixType::MaxColsAtCompileTime),
-    MaxColsAtCompileTime = 1,
-    Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit),
-    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
-  };
-};
-
-template<typename MatrixType> class DiagonalCoeffs
-   : public MatrixBase<DiagonalCoeffs<MatrixType> >
-{
-  public:
-
-    EIGEN_GENERIC_PUBLIC_INTERFACE(DiagonalCoeffs)
-
-    inline DiagonalCoeffs(const MatrixType& matrix) : m_matrix(matrix) {}
-
-    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(DiagonalCoeffs)
-
-    inline int rows() const { return std::min(m_matrix.rows(), m_matrix.cols()); }
-    inline int cols() const { return 1; }
-
-    inline Scalar& coeffRef(int row, int)
-    {
-      return m_matrix.const_cast_derived().coeffRef(row, row);
-    }
-
-    inline const Scalar coeff(int row, int) const
-    {
-      return m_matrix.coeff(row, row);
-    }
-
-    inline Scalar& coeffRef(int index)
-    {
-      return m_matrix.const_cast_derived().coeffRef(index, index);
-    }
-
-    inline const Scalar coeff(int index) const
-    {
-      return m_matrix.coeff(index, index);
-    }
-
-  protected:
-
-    const typename MatrixType::Nested m_matrix;
-};
-
-/** \returns an expression of the main diagonal of the matrix \c *this
-  *
-  * \c *this is not required to be square.
-  *
-  * Example: \include MatrixBase_diagonal.cpp
-  * Output: \verbinclude MatrixBase_diagonal.out
-  *
-  * \sa class DiagonalCoeffs */
-template<typename Derived>
-inline DiagonalCoeffs<Derived>
-MatrixBase<Derived>::diagonal()
-{
-  return DiagonalCoeffs<Derived>(derived());
-}
-
-/** This is the const version of diagonal(). */
-template<typename Derived>
-inline const DiagonalCoeffs<Derived>
-MatrixBase<Derived>::diagonal() const
-{
-  return DiagonalCoeffs<Derived>(derived());
-}
-
-#endif // EIGEN_DIAGONALCOEFFS_H
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -1,7 +1,8 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -25,47 +26,92 @@
 #ifndef EIGEN_DIAGONALMATRIX_H
 #define EIGEN_DIAGONALMATRIX_H

-/** \class DiagonalMatrix
-  *
-  * \brief Expression of a diagonal matrix
-  *
-  * \param CoeffsVectorType the type of the vector of diagonal coefficients
-  *
-  * This class is an expression of a diagonal matrix with given vector of diagonal
-  * coefficients. It is the return
-  * type of MatrixBase::diagonal(const OtherDerived&) and most of the time this is
-  * the only way it is used.
-  *
-  * \sa MatrixBase::diagonal(const OtherDerived&)
-  */
-template<typename CoeffsVectorType>
-struct ei_traits<DiagonalMatrix<CoeffsVectorType> >
-{
-  typedef typename CoeffsVectorType::Scalar Scalar;
-  typedef typename ei_nested<CoeffsVectorType>::type CoeffsVectorTypeNested;
-  typedef typename ei_unref<CoeffsVectorTypeNested>::type _CoeffsVectorTypeNested;
-  enum {
-    RowsAtCompileTime = CoeffsVectorType::SizeAtCompileTime,
-    ColsAtCompileTime = CoeffsVectorType::SizeAtCompileTime,
-    MaxRowsAtCompileTime = CoeffsVectorType::MaxSizeAtCompileTime,
-    MaxColsAtCompileTime = CoeffsVectorType::MaxSizeAtCompileTime,
-    Flags = (_CoeffsVectorTypeNested::Flags & HereditaryBits) | Diagonal,
-    CoeffReadCost = _CoeffsVectorTypeNested::CoeffReadCost
-  };
-};

-template<typename CoeffsVectorType>
-class DiagonalMatrix : ei_no_assignment_operator,
-   public MatrixBase<DiagonalMatrix<CoeffsVectorType> >
+template<typename CoeffsVectorType, typename Derived>
+class DiagonalMatrixBase : ei_no_assignment_operator,
+   public MatrixBase<Derived>
 {
  public:
+    typedef MatrixBase<Derived> Base;
+    typedef typename ei_traits<Derived>::Scalar Scalar;
+    typedef typename Base::PacketScalar PacketScalar;
+    using Base::derived;
+    typedef typename ei_cleantype<CoeffsVectorType>::type _CoeffsVectorType;

-    EIGEN_GENERIC_PUBLIC_INTERFACE(DiagonalMatrix)
+  protected:

-    inline DiagonalMatrix(const CoeffsVectorType& coeffs) : m_coeffs(coeffs)
+    // MSVC gets crazy if we define default parameters
+    template<typename OtherDerived, bool IsVector, bool IsDiagonal> struct construct_from_expression;
+
+    // = vector
+    template<typename OtherDerived>
+    struct construct_from_expression<OtherDerived,true,false>
    {
-      ei_assert(CoeffsVectorType::IsVectorAtCompileTime
-          && coeffs.size() > 0);
+      static void run(Derived& dst, const OtherDerived& src)
+      { dst.diagonal() = src; }
+    };
+
+    // = diagonal expression
+    template<typename OtherDerived, bool IsVector>
+    struct construct_from_expression<OtherDerived,IsVector,true>
+    {
+      static void run(Derived& dst, const OtherDerived& src)
+      { dst.diagonal() = src.diagonal(); }
+    };
+
+    /** Default constructor without initialization */
+    inline DiagonalMatrixBase() {}
+    /** Constructs a diagonal matrix with given dimension */
+    inline DiagonalMatrixBase(int dim) : m_coeffs(dim) {}
+    /** Generic constructor from an expression */
+    template<typename OtherDerived>
+    inline DiagonalMatrixBase(const MatrixBase<OtherDerived>& other)
+    {
+      construct_from_expression<OtherDerived,OtherDerived::IsVectorAtCompileTime,ei_is_diagonal<OtherDerived>::ret>
+        ::run(derived(),other.derived());
+    }
+    
+    template<typename NewType,int dummy=0>
+    struct cast_selector {
+      typedef const DiagonalMatrixWrapper<NestByValue<CwiseUnaryOp<ei_scalar_cast_op<Scalar, NewType>, _CoeffsVectorType> > > return_type;
+      inline static return_type run(const DiagonalMatrixBase& d) {
+        return d.m_coeffs.template cast<NewType>().nestByValue().asDiagonal();
+      }
+    };
+    
+    template<int dummy>
+    struct cast_selector<Scalar,dummy> {
+      typedef const Derived& return_type;
+      inline static return_type run(const DiagonalMatrixBase& d) {
+        return d.derived();
+      }
+    };
+
+  public:
+
+    inline DiagonalMatrixBase(const _CoeffsVectorType& coeffs) : m_coeffs(coeffs)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(_CoeffsVectorType);
+      ei_assert(coeffs.size() > 0);
+    }
+
+    template<typename NewType>
+    inline typename cast_selector<NewType,0>::return_type
+    cast() const
+    {
+      return cast_selector<NewType,0>::run(*this);
+    }
+
+    /** Assignment operator.
+      * The right-hand-side \a other must be either a vector representing the diagonal
+      * coefficients or a diagonal matrix expression.
+      */
+    template<typename OtherDerived>
+    inline Derived& operator=(const MatrixBase<OtherDerived>& other)
+    {
+      construct_from_expression<OtherDerived,OtherDerived::IsVectorAtCompileTime,ei_is_diagonal<OtherDerived>::ret>
+        ::run(derived(),other);
+      return derived();
    }

    inline int rows() const { return m_coeffs.size(); }
@@ -76,11 +122,154 @@ class DiagonalMatrix : ei_no_assignment_operator,
      return row == col ? m_coeffs.coeff(row) : static_cast<Scalar>(0);
    }

+    inline Scalar& coeffRef(int row, int col)
+    {
+      ei_assert(row==col);
+      return m_coeffs.coeffRef(row);
+    }
+
+    inline _CoeffsVectorType& diagonal() { return m_coeffs; }
+    inline const _CoeffsVectorType& diagonal() const { return m_coeffs; }
+
  protected:
-    const typename CoeffsVectorType::Nested m_coeffs;
+    CoeffsVectorType m_coeffs;
 };

-/** \returns an expression of a diagonal matrix with *this as vector of diagonal coefficients
+/** \class DiagonalMatrix
+  * \nonstableyet
+  *
+  * \brief Represent a diagonal matrix with its storage
+  *
+  * \param _Scalar the type of coefficients
+  * \param _Size the dimension of the matrix
+  *
+  * \sa class Matrix
+  */
+template<typename _Scalar,int _Size>
+struct ei_traits<DiagonalMatrix<_Scalar,_Size> > : ei_traits<Matrix<_Scalar,_Size,_Size> >
+{
+  enum {
+    Flags = (ei_traits<Matrix<_Scalar,_Size,_Size> >::Flags & HereditaryBits) | DiagonalBits
+  };
+};
+
+template<typename _Scalar, int _Size>
+class DiagonalMatrix
+  : public DiagonalMatrixBase<Matrix<_Scalar,_Size,1>, DiagonalMatrix<_Scalar,_Size> >
+{
+  public:
+    EIGEN_GENERIC_PUBLIC_INTERFACE(DiagonalMatrix)
+    typedef DiagonalMatrixBase<Matrix<_Scalar,_Size,1>, DiagonalMatrix<_Scalar,_Size> > DiagonalBase;
+
+  protected:
+    typedef Matrix<_Scalar,_Size,1> CoeffVectorType;
+    using DiagonalBase::m_coeffs;
+
+  public:
+
+    /** Default constructor without initialization */
+    inline DiagonalMatrix() : DiagonalBase()
+    {}
+
+    /** Constructs a diagonal matrix with given dimension  */
+    inline DiagonalMatrix(int dim) : DiagonalBase(dim)
+    {}
+
+    /** 2D only */
+    inline DiagonalMatrix(const Scalar& sx, const Scalar& sy)
+    {
+      EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(DiagonalMatrix,2,2);
+      m_coeffs.x() = sx;
+      m_coeffs.y() = sy;
+    }
+    /** 3D only */
+    inline DiagonalMatrix(const Scalar& sx, const Scalar& sy, const Scalar& sz)
+    {
+      EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(DiagonalMatrix,3,3);
+      m_coeffs.x() = sx;
+      m_coeffs.y() = sy;
+      m_coeffs.z() = sz;
+    }
+
+    /** copy constructor */
+    inline DiagonalMatrix(const DiagonalMatrix& other) : DiagonalBase(other.m_coeffs)
+    {}
+
+    /** generic constructor from expression */
+    template<typename OtherDerived>
+    explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : DiagonalBase(other)
+    {}
+
+    DiagonalMatrix& operator=(const DiagonalMatrix& other)
+    {
+      m_coeffs = other.m_coeffs;
+      return *this;
+    }
+
+    template<typename OtherDerived>
+    DiagonalMatrix& operator=(const MatrixBase<OtherDerived>& other)
+    {
+      EIGEN_STATIC_ASSERT(ei_is_diagonal<OtherDerived>::ret, THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX);
+      m_coeffs = other.diagonal();
+      return *this;
+    }
+    
+    inline void resize(int size)
+    {
+      m_coeffs.resize(size);
+    }
+    
+    inline void resize(int rows, int cols)
+    {
+      ei_assert(rows==cols && "a diagonal matrix must be square");
+      m_coeffs.resize(rows);
+    }
+    
+    inline void setZero() { m_coeffs.setZero(); }
+};
+
+/** \class DiagonalMatrixWrapper
+  * \nonstableyet
+  *
+  * \brief Expression of a diagonal matrix
+  *
+  * \param CoeffsVectorType the type of the vector of diagonal coefficients
+  *
+  * This class is an expression of a diagonal matrix with given vector of diagonal
+  * coefficients. It is the return type of MatrixBase::diagonal(const OtherDerived&)
+  * and most of the time this is the only way it is used.
+  *
+  * \sa class DiagonalMatrixBase, class DiagonalMatrix, MatrixBase::asDiagonal()
+  */
+template<typename CoeffsVectorType>
+struct ei_traits<DiagonalMatrixWrapper<CoeffsVectorType> >
+{
+  typedef typename CoeffsVectorType::Scalar Scalar;
+  typedef typename ei_nested<CoeffsVectorType>::type CoeffsVectorTypeNested;
+  typedef typename ei_unref<CoeffsVectorTypeNested>::type _CoeffsVectorTypeNested;
+  enum {
+    RowsAtCompileTime = CoeffsVectorType::SizeAtCompileTime,
+    ColsAtCompileTime = CoeffsVectorType::SizeAtCompileTime,
+    MaxRowsAtCompileTime = CoeffsVectorType::MaxSizeAtCompileTime,
+    MaxColsAtCompileTime = CoeffsVectorType::MaxSizeAtCompileTime,
+    Flags = (_CoeffsVectorTypeNested::Flags & HereditaryBits) | DiagonalBits,
+    CoeffReadCost = _CoeffsVectorTypeNested::CoeffReadCost
+  };
+};
+template<typename CoeffsVectorType>
+class DiagonalMatrixWrapper
+  : public DiagonalMatrixBase<typename CoeffsVectorType::Nested, DiagonalMatrixWrapper<CoeffsVectorType> >
+{
+    typedef typename CoeffsVectorType::Nested CoeffsVectorTypeNested;
+    typedef DiagonalMatrixBase<CoeffsVectorTypeNested, DiagonalMatrixWrapper<CoeffsVectorType> > DiagonalBase;
+  public:
+    EIGEN_GENERIC_PUBLIC_INTERFACE(DiagonalMatrixWrapper)
+    inline DiagonalMatrixWrapper(const CoeffsVectorType& coeffs) : DiagonalBase(coeffs)
+    {}
+};
+
+/** \nonstableyet
+  * \returns an expression of a diagonal matrix with *this as vector of diagonal coefficients
  *
  * \only_for_vectors
  *
@@ -92,13 +281,14 @@ class DiagonalMatrix : ei_no_assignment_operator,
  * \sa class DiagonalMatrix, isDiagonal()
  **/
 template<typename Derived>
-inline const DiagonalMatrix<Derived>
+inline const DiagonalMatrixWrapper<Derived>
 MatrixBase<Derived>::asDiagonal() const
 {
  return derived();
 }

-/** \returns true if *this is approximately equal to a diagonal matrix,
+/** \nonstableyet
+  * \returns true if *this is approximately equal to a diagonal matrix,
  *          within the precision given by \a prec.
  *
  * Example: \include MatrixBase_isDiagonal.cpp
@@ -112,13 +302,13 @@ bool MatrixBase<Derived>::isDiagonal
 {
  if(cols() != rows()) return false;
  RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
-  for(int j = 0; j < cols(); j++)
+  for(int j = 0; j < cols(); ++j)
  {
    RealScalar absOnDiagonal = ei_abs(coeff(j,j));
    if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
  }
-  for(int j = 0; j < cols(); j++)
-    for(int i = 0; i < j; i++)
+  for(int j = 0; j < cols(); ++j)
+    for(int i = 0; i < j; ++i)
    {
      if(!ei_isMuchSmallerThan(coeff(i, j), maxAbsOnDiagonal, prec)) return false;
      if(!ei_isMuchSmallerThan(coeff(j, i), maxAbsOnDiagonal, prec)) return false;
--- a/Eigen/src/Core/DiagonalProduct.h
+++ b/Eigen/src/Core/DiagonalProduct.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -26,12 +26,31 @@
 #ifndef EIGEN_DIAGONALPRODUCT_H
 #define EIGEN_DIAGONALPRODUCT_H

+/** \internal Specialization of ei_nested for DiagonalMatrix.
+ *  Unlike ei_nested, if the argument is a DiagonalMatrix and if it must be evaluated,
+ *  then it evaluated to a DiagonalMatrix having its own argument evaluated.
+ */
+template<typename T, int N, bool IsDiagonal = ei_is_diagonal<T>::ret> struct ei_nested_diagonal : ei_nested<T,N> {};
+template<typename T, int N> struct ei_nested_diagonal<T,N,true>
+ : ei_nested<T, N, DiagonalMatrix<typename T::Scalar, EIGEN_ENUM_MIN(T::RowsAtCompileTime,T::ColsAtCompileTime)> >
+{};
+
+// specialization of ProductReturnType
+template<typename Lhs, typename Rhs>
+struct ProductReturnType<Lhs,Rhs,DiagonalProduct>
+{
+  typedef typename ei_nested_diagonal<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+  typedef typename ei_nested_diagonal<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+
+  typedef Product<LhsNested, RhsNested, DiagonalProduct> Type;
+};
+
 template<typename LhsNested, typename RhsNested>
 struct ei_traits<Product<LhsNested, RhsNested, DiagonalProduct> >
 {
  // clean the nested types:
-  typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
-  typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
+  typedef typename ei_cleantype<LhsNested>::type _LhsNested;
+  typedef typename ei_cleantype<RhsNested>::type _RhsNested;
  typedef typename _LhsNested::Scalar Scalar;

  enum {
@@ -42,8 +61,8 @@ struct ei_traits<Product<LhsNested, RhsNested, DiagonalProduct> >
    MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,

-    LhsIsDiagonal = (_LhsNested::Flags&Diagonal)==Diagonal,
-    RhsIsDiagonal = (_RhsNested::Flags&Diagonal)==Diagonal,
+    LhsIsDiagonal = ei_is_diagonal<_LhsNested>::ret,
+    RhsIsDiagonal = ei_is_diagonal<_RhsNested>::ret,

    CanVectorizeRhs =  (!RhsIsDiagonal) && (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit)
                     && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
@@ -54,7 +73,7 @@ struct ei_traits<Product<LhsNested, RhsNested, DiagonalProduct> >
    RemovedBits = ~((RhsFlags & RowMajorBit) && (!CanVectorizeLhs) ? 0 : RowMajorBit),

    Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits)
-          | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0),
+          | (((CanVectorizeLhs&&RhsIsDiagonal) || (CanVectorizeRhs&&LhsIsDiagonal)) ? PacketAccessBit : 0),

    CoeffReadCost = NumTraits<Scalar>::MulCost + _LhsNested::CoeffReadCost + _RhsNested::CoeffReadCost
  };
@@ -67,7 +86,7 @@ template<typename LhsNested, typename RhsNested> class Product<LhsNested, RhsNes
    typedef typename ei_traits<Product>::_RhsNested _RhsNested;

    enum {
-      RhsIsDiagonal = (_RhsNested::Flags&Diagonal)==Diagonal
+      RhsIsDiagonal = ei_is_diagonal<_RhsNested>::ret
    };

  public:
@@ -95,12 +114,10 @@ template<typename LhsNested, typename RhsNested> class Product<LhsNested, RhsNes
    {
      if (RhsIsDiagonal)
      {
-        ei_assert((_LhsNested::Flags&RowMajorBit)==0);
        return ei_pmul(m_lhs.template packet<LoadMode>(row, col), ei_pset1(m_rhs.coeff(col, col)));
      }
      else
      {
-        ei_assert(_RhsNested::Flags&RowMajorBit);
        return ei_pmul(ei_pset1(m_lhs.coeff(row, row)), m_rhs.template packet<LoadMode>(row, col));
      }
    }
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -153,9 +153,10 @@ struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
  typedef typename Derived1::Scalar Scalar;
  static Scalar run(const Derived1& v1, const Derived2& v2)
  {
+    ei_assert(v1.size()>0 && "you are using a non initialized vector");
    Scalar res;
    res = v1.coeff(0) * ei_conj(v2.coeff(0));
-    for(int i = 1; i < v1.size(); i++)
+    for(int i = 1; i < v1.size(); ++i)
      res += v1.coeff(i) * ei_conj(v2.coeff(i));
    return res;
  }
@@ -210,7 +211,7 @@ struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
    }

    // do the remainder of the vector
-    for(int index = alignedSize; index < size; index++)
+    for(int index = alignedSize; index < size; ++index)
    {
      res += v1.coeff(index) * v2.coeff(index);
    }
@@ -247,51 +248,60 @@ struct ei_dot_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling>
  * \only_for_vectors
  *
  * \note If the scalar type is complex numbers, then this function returns the hermitian
-  * (sesquilinear) dot product, linear in the first variable and anti-linear in the
+  * (sesquilinear) dot product, linear in the first variable and conjugate-linear in the
  * second variable.
  *
-  * \sa norm2(), norm()
+  * \sa squaredNorm(), norm()
  */
 template<typename Derived>
 template<typename OtherDerived>
 typename ei_traits<Derived>::Scalar
 MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
 {
-  typedef typename Derived::Nested Nested;
-  typedef typename OtherDerived::Nested OtherNested;
-  typedef typename ei_unref<Nested>::type _Nested;
-  typedef typename ei_unref<OtherNested>::type _OtherNested;
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
+  EIGEN_STATIC_ASSERT((ei_is_same_type<Scalar, typename OtherDerived::Scalar>::ret),
+    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)

-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(_Nested);
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(_OtherNested);
-  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(_Nested,_OtherNested);
  ei_assert(size() == other.size());

-  return ei_dot_impl<_Nested, _OtherNested>::run(derived(), other.derived());
+  // dot() must honor EvalBeforeNestingBit (eg: v.dot(M*v) )
+  typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested;
+  typedef typename ei_cleantype<typename OtherDerived::Nested>::type OtherNested;
+  return ei_dot_impl<ThisNested, OtherNested>::run(derived(), other.derived());
 }

-/** \returns the squared norm of *this, i.e. the dot product of *this with itself.
-  *
-  * \only_for_vectors
+/** \returns the squared \em l2 norm of *this, i.e., for vectors, the dot product of *this with itself.
  *
  * \sa dot(), norm()
  */
 template<typename Derived>
-inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm2() const
+inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
 {
-  return ei_real(dot(*this));
+  return ei_real((*this).cwise().abs2().sum());
 }

-/** \returns the norm of *this, i.e. the square root of the dot product of *this with itself.
+/** \returns the \em l2 norm of *this, i.e., for vectors, the square root of the dot product of *this with itself.
  *
-  * \only_for_vectors
-  *
-  * \sa dot(), norm2()
+  * \sa dot(), squaredNorm()
  */
 template<typename Derived>
 inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
 {
-  return ei_sqrt(norm2());
+  return ei_sqrt(squaredNorm());
+}
+
+/** \returns the \em l2 norm of \c *this using a numerically more stable
+  * algorithm.
+  *
+  * \sa norm(), dot(), squaredNorm()
+  */
+template<typename Derived>
+inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real
+MatrixBase<Derived>::stableNorm() const
+{
+  return this->cwise().abs().redux(ei_scalar_hypot_op<RealScalar>());
 }

 /** \returns an expression of the quotient of *this by its own norm.
@@ -301,7 +311,7 @@ inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<
  * \sa norm(), normalize()
  */
 template<typename Derived>
-inline const typename MatrixBase<Derived>::EvalType
+inline const typename MatrixBase<Derived>::PlainMatrixType
 MatrixBase<Derived>::normalized() const
 {
  typedef typename ei_nested<Derived>::type Nested;
@@ -335,7 +345,7 @@ bool MatrixBase<Derived>::isOrthogonal
 {
  typename ei_nested<Derived,2>::type nested(derived());
  typename ei_nested<OtherDerived,2>::type otherNested(other.derived());
-  return ei_abs2(nested.dot(otherNested)) <= prec * prec * nested.norm2() * otherNested.norm2();
+  return ei_abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();
 }

 /** \returns true if *this is approximately an unitary matrix,
@@ -353,11 +363,11 @@ template<typename Derived>
 bool MatrixBase<Derived>::isUnitary(RealScalar prec) const
 {
  typename Derived::Nested nested(derived());
-  for(int i = 0; i < cols(); i++)
+  for(int i = 0; i < cols(); ++i)
  {
-    if(!ei_isApprox(nested.col(i).norm2(), static_cast<Scalar>(1), prec))
+    if(!ei_isApprox(nested.col(i).squaredNorm(), static_cast<Scalar>(1), prec))
      return false;
-    for(int j = 0; j < i; j++)
+    for(int j = 0; j < i; ++j)
      if(!ei_isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec))
        return false;
  }
--- a/Eigen/src/Core/Flagged.h
+++ b/Eigen/src/Core/Flagged.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -40,18 +40,9 @@
  * \sa MatrixBase::flagged()
  */
 template<typename ExpressionType, unsigned int Added, unsigned int Removed>
-struct ei_traits<Flagged<ExpressionType, Added, Removed> >
+struct ei_traits<Flagged<ExpressionType, Added, Removed> > : ei_traits<ExpressionType>
 {
-  typedef typename ExpressionType::Scalar Scalar;
-
-  enum {
-    RowsAtCompileTime = ExpressionType::RowsAtCompileTime,
-    ColsAtCompileTime = ExpressionType::ColsAtCompileTime,
-    MaxRowsAtCompileTime = ExpressionType::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = ExpressionType::MaxColsAtCompileTime,
-    Flags = (ExpressionType::Flags | Added) & ~Removed,
-    CoeffReadCost = ExpressionType::CoeffReadCost
-  };
+  enum { Flags = (ExpressionType::Flags | Added) & ~Removed };
 };

 template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -33,10 +33,13 @@
  * \sa class CwiseBinaryOp, MatrixBase::operator+, class PartialRedux, MatrixBase::sum()
  */
 template<typename Scalar> struct ei_scalar_sum_op EIGEN_EMPTY_STRUCT {
-  inline const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
  template<typename PacketScalar>
-  inline const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
  { return ei_padd(a,b); }
+  template<typename PacketScalar>
+  EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
+  { return ei_predux(a); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
@@ -52,10 +55,13 @@ struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
  * \sa class CwiseBinaryOp, Cwise::operator*(), class PartialRedux, MatrixBase::redux()
  */
 template<typename Scalar> struct ei_scalar_product_op EIGEN_EMPTY_STRUCT {
-  inline const Scalar operator() (const Scalar& a, const Scalar& b) const { return a * b; }
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a * b; }
  template<typename PacketScalar>
-  inline const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
  { return ei_pmul(a,b); }
+  template<typename PacketScalar>
+  EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
+  { return ei_predux_mul(a); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
@@ -71,10 +77,13 @@ struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
  * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class PartialRedux, MatrixBase::minCoeff()
  */
 template<typename Scalar> struct ei_scalar_min_op EIGEN_EMPTY_STRUCT {
-  inline const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::min(a, b); }
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::min(a, b); }
  template<typename PacketScalar>
-  inline const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
  { return ei_pmin(a,b); }
+  template<typename PacketScalar>
+  EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
+  { return ei_predux_min(a); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_min_op<Scalar> > {
@@ -90,10 +99,13 @@ struct ei_functor_traits<ei_scalar_min_op<Scalar> > {
  * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class PartialRedux, MatrixBase::maxCoeff()
  */
 template<typename Scalar> struct ei_scalar_max_op EIGEN_EMPTY_STRUCT {
-  inline const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::max(a, b); }
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return std::max(a, b); }
  template<typename PacketScalar>
-  inline const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
  { return ei_pmax(a,b); }
+  template<typename PacketScalar>
+  EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
+  { return ei_predux_max(a); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_max_op<Scalar> > {
@@ -103,6 +115,28 @@ struct ei_functor_traits<ei_scalar_max_op<Scalar> > {
  };
 };

+/** \internal
+  * \brief Template functor to compute the hypot of two scalars
+  *
+  * \sa MatrixBase::stableNorm(), class Redux
+  */
+template<typename Scalar> struct ei_scalar_hypot_op EIGEN_EMPTY_STRUCT {
+//   typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
+  {
+//     typedef typename NumTraits<T>::Real RealScalar;
+//     RealScalar _x = ei_abs(x);
+//     RealScalar _y = ei_abs(y);
+    Scalar p = std::max(_x, _y);
+    Scalar q = std::min(_x, _y);
+    Scalar qp = q/p;
+    return p * ei_sqrt(Scalar(1) + qp*qp);
+  }
+};
+template<typename Scalar>
+struct ei_functor_traits<ei_scalar_hypot_op<Scalar> > {
+  enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess=0 };
+};

 // other binary functors:

@@ -112,9 +146,9 @@ struct ei_functor_traits<ei_scalar_max_op<Scalar> > {
  * \sa class CwiseBinaryOp, MatrixBase::operator-
  */
 template<typename Scalar> struct ei_scalar_difference_op EIGEN_EMPTY_STRUCT {
-  inline const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
  template<typename PacketScalar>
-  inline const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
  { return ei_psub(a,b); }
 };
 template<typename Scalar>
@@ -131,9 +165,9 @@ struct ei_functor_traits<ei_scalar_difference_op<Scalar> > {
  * \sa class CwiseBinaryOp, Cwise::operator/()
  */
 template<typename Scalar> struct ei_scalar_quotient_op EIGEN_EMPTY_STRUCT {
-  inline const Scalar operator() (const Scalar& a, const Scalar& b) const { return a / b; }
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a / b; }
  template<typename PacketScalar>
-  inline const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
  { return ei_pdiv(a,b); }
 };
 template<typename Scalar>
@@ -155,11 +189,17 @@ struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > {
  * \sa class CwiseUnaryOp, MatrixBase::operator-
  */
 template<typename Scalar> struct ei_scalar_opposite_op EIGEN_EMPTY_STRUCT {
-  inline const Scalar operator() (const Scalar& a) const { return -a; }
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
+  template<typename PacketScalar>
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
+  { return ei_pnegate(a); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_opposite_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false }; };
+{ enum {
+    Cost = NumTraits<Scalar>::AddCost,
+    PacketAccess = int(ei_packet_traits<Scalar>::size)>1 };
+};

 /** \internal
  * \brief Template functor to compute the absolute value of a scalar
@@ -168,14 +208,17 @@ struct ei_functor_traits<ei_scalar_opposite_op<Scalar> >
  */
 template<typename Scalar> struct ei_scalar_abs_op EIGEN_EMPTY_STRUCT {
  typedef typename NumTraits<Scalar>::Real result_type;
-  inline const result_type operator() (const Scalar& a) const { return ei_abs(a); }
+  EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return ei_abs(a); }
+  template<typename PacketScalar>
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
+  { return ei_pabs(a); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_abs_op<Scalar> >
 {
  enum {
    Cost = NumTraits<Scalar>::AddCost,
-    PacketAccess = false // this could actually be vectorized with SSSE3.
+    PacketAccess = int(ei_packet_traits<Scalar>::size)>1
  };
 };

@@ -186,9 +229,9 @@ struct ei_functor_traits<ei_scalar_abs_op<Scalar> >
  */
 template<typename Scalar> struct ei_scalar_abs2_op EIGEN_EMPTY_STRUCT {
  typedef typename NumTraits<Scalar>::Real result_type;
-  inline const result_type operator() (const Scalar& a) const { return ei_abs2(a); }
+  EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return ei_abs2(a); }
  template<typename PacketScalar>
-  inline const PacketScalar packetOp(const PacketScalar& a) const
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
  { return ei_pmul(a,a); }
 };
 template<typename Scalar>
@@ -201,9 +244,9 @@ struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
  * \sa class CwiseUnaryOp, MatrixBase::conjugate()
  */
 template<typename Scalar> struct ei_scalar_conjugate_op EIGEN_EMPTY_STRUCT {
-  inline const Scalar operator() (const Scalar& a) const { return ei_conj(a); }
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return ei_conj(a); }
  template<typename PacketScalar>
-  inline const PacketScalar packetOp(const PacketScalar& a) const { return a; }
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return a; }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_conjugate_op<Scalar> >
@@ -222,7 +265,7 @@ struct ei_functor_traits<ei_scalar_conjugate_op<Scalar> >
 template<typename Scalar, typename NewType>
 struct ei_scalar_cast_op EIGEN_EMPTY_STRUCT {
  typedef NewType result_type;
-  inline const NewType operator() (const Scalar& a) const { return static_cast<NewType>(a); }
+  EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return static_cast<NewType>(a); }
 };
 template<typename Scalar, typename NewType>
 struct ei_functor_traits<ei_scalar_cast_op<Scalar,NewType> >
@@ -236,11 +279,25 @@ struct ei_functor_traits<ei_scalar_cast_op<Scalar,NewType> >
 template<typename Scalar>
 struct ei_scalar_real_op EIGEN_EMPTY_STRUCT {
  typedef typename NumTraits<Scalar>::Real result_type;
-  inline result_type operator() (const Scalar& a) const { return ei_real(a); }
+  EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return ei_real(a); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_real_op<Scalar> >
-{ enum { Cost =  0, PacketAccess = false }; };
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+  * \brief Template functor to extract the imaginary part of a complex
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::imag()
+  */
+template<typename Scalar>
+struct ei_scalar_imag_op EIGEN_EMPTY_STRUCT {
+  typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return ei_imag(a); }
+};
+template<typename Scalar>
+struct ei_functor_traits<ei_scalar_imag_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };

 /** \internal
  * \brief Template functor to multiply a scalar by a fixed other one
@@ -251,7 +308,7 @@ struct ei_functor_traits<ei_scalar_real_op<Scalar> >
 * indeed it seems better to declare m_other as a PacketScalar and do the ei_pset1() once
 * in the constructor. However, in practice:
 *  - GCC does not like m_other as a PacketScalar and generate a load every time it needs it
- *  - one the other hand GCC is able to moves the ei_pset1() away the loop :)
+ *  - on the other hand GCC is able to moves the ei_pset1() away the loop :)
 *  - simpler code ;)
 * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y)
 */
@@ -259,10 +316,10 @@ template<typename Scalar>
 struct ei_scalar_multiple_op {
  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
  // FIXME default copy constructors seems bugged with std::complex<>
-  inline ei_scalar_multiple_op(const ei_scalar_multiple_op& other) : m_other(other.m_other) { }
-  inline ei_scalar_multiple_op(const Scalar& other) : m_other(other) { }
-  inline Scalar operator() (const Scalar& a) const { return a * m_other; }
-  inline const PacketScalar packetOp(const PacketScalar& a) const
+  EIGEN_STRONG_INLINE ei_scalar_multiple_op(const ei_scalar_multiple_op& other) : m_other(other.m_other) { }
+  EIGEN_STRONG_INLINE ei_scalar_multiple_op(const Scalar& other) : m_other(other) { }
+  EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
  { return ei_pmul(a, ei_pset1(m_other)); }
  const Scalar m_other;
 };
@@ -270,14 +327,26 @@ template<typename Scalar>
 struct ei_functor_traits<ei_scalar_multiple_op<Scalar> >
 { enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };

+template<typename Scalar1, typename Scalar2>
+struct ei_scalar_multiple2_op {
+  typedef typename ei_scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
+  EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const ei_scalar_multiple2_op& other) : m_other(other.m_other) { }
+  EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
+  EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
+  const Scalar2 m_other;
+};
+template<typename Scalar1,typename Scalar2>
+struct ei_functor_traits<ei_scalar_multiple2_op<Scalar1,Scalar2> >
+{ enum { Cost = NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
+
 template<typename Scalar, bool HasFloatingPoint>
 struct ei_scalar_quotient1_impl {
  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
  // FIXME default copy constructors seems bugged with std::complex<>
-  inline ei_scalar_quotient1_impl(const ei_scalar_quotient1_impl& other) : m_other(other.m_other) { }
-  inline ei_scalar_quotient1_impl(const Scalar& other) : m_other(static_cast<Scalar>(1) / other) {}
-  inline Scalar operator() (const Scalar& a) const { return a * m_other; }
-  inline const PacketScalar packetOp(const PacketScalar& a) const
+  EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const ei_scalar_quotient1_impl& other) : m_other(other.m_other) { }
+  EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const Scalar& other) : m_other(static_cast<Scalar>(1) / other) {}
+  EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
  { return ei_pmul(a, ei_pset1(m_other)); }
  const Scalar m_other;
 };
@@ -288,9 +357,9 @@ struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,true> >
 template<typename Scalar>
 struct ei_scalar_quotient1_impl<Scalar,false> {
  // FIXME default copy constructors seems bugged with std::complex<>
-  inline ei_scalar_quotient1_impl(const ei_scalar_quotient1_impl& other) : m_other(other.m_other) { }
-  inline ei_scalar_quotient1_impl(const Scalar& other) : m_other(other) {}
-  inline Scalar operator() (const Scalar& a) const { return a / m_other; }
+  EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const ei_scalar_quotient1_impl& other) : m_other(other.m_other) { }
+  EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const Scalar& other) : m_other(other) {}
+  EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
  const Scalar m_other;
 };
 template<typename Scalar>
@@ -307,19 +376,23 @@ struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,false> >
  */
 template<typename Scalar>
 struct ei_scalar_quotient1_op : ei_scalar_quotient1_impl<Scalar, NumTraits<Scalar>::HasFloatingPoint > {
-  inline ei_scalar_quotient1_op(const Scalar& other)
+  EIGEN_STRONG_INLINE ei_scalar_quotient1_op(const Scalar& other)
    : ei_scalar_quotient1_impl<Scalar, NumTraits<Scalar>::HasFloatingPoint >(other) {}
 };
+template<typename Scalar>
+struct ei_functor_traits<ei_scalar_quotient1_op<Scalar> > 
+: ei_functor_traits<ei_scalar_quotient1_impl<Scalar, NumTraits<Scalar>::HasFloatingPoint> >
+{};

 // nullary functors

 template<typename Scalar>
 struct ei_scalar_constant_op {
  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
-  inline ei_scalar_constant_op(const ei_scalar_constant_op& other) : m_other(other.m_other) { }
-  inline ei_scalar_constant_op(const Scalar& other) : m_other(other) { }
-  inline const Scalar operator() (int, int = 0) const { return m_other; }
-  inline const PacketScalar packetOp() const { return ei_pset1(m_other); }
+  EIGEN_STRONG_INLINE ei_scalar_constant_op(const ei_scalar_constant_op& other) : m_other(other.m_other) { }
+  EIGEN_STRONG_INLINE ei_scalar_constant_op(const Scalar& other) : m_other(other) { }
+  EIGEN_STRONG_INLINE const Scalar operator() (int, int = 0) const { return m_other; }
+  EIGEN_STRONG_INLINE const PacketScalar packetOp() const { return ei_pset1(m_other); }
  const Scalar m_other;
 };
 template<typename Scalar>
@@ -327,18 +400,28 @@ struct ei_functor_traits<ei_scalar_constant_op<Scalar> >
 { enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };

 template<typename Scalar> struct ei_scalar_identity_op EIGEN_EMPTY_STRUCT {
-  inline ei_scalar_identity_op(void) {}
-  inline const Scalar operator() (int row, int col) const { return row==col ? Scalar(1) : Scalar(0); }
+  EIGEN_STRONG_INLINE ei_scalar_identity_op(void) {}
+  EIGEN_STRONG_INLINE const Scalar operator() (int row, int col) const { return row==col ? Scalar(1) : Scalar(0); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_identity_op<Scalar> >
 { enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };

-// NOTE quick hack:
+// allow to add new functors and specializations of ei_functor_traits from outside Eigen.
+// this macro is really needed because ei_functor_traits must be specialized after it is declared but before it is used...
+#ifdef EIGEN_FUNCTORS_PLUGIN
+#include EIGEN_FUNCTORS_PLUGIN
+#endif
+
 // all functors allow linear access, except ei_scalar_identity_op. So we fix here a quick meta
 // to indicate whether a functor allows linear access, just always answering 'yes' except for
 // ei_scalar_identity_op.
 template<typename Functor> struct ei_functor_has_linear_access { enum { ret = 1 }; };
 template<typename Scalar> struct ei_functor_has_linear_access<ei_scalar_identity_op<Scalar> > { enum { ret = 0 }; };

+// in CwiseBinaryOp, we require the Lhs and Rhs to have the same scalar type, except for multiplication
+// where we only require them to have the same _real_ scalar type so one may multiply, say, float by complex<float>.
+template<typename Functor> struct ei_functor_allows_mixing_real_and_complex { enum { ret = 0 }; };
+template<typename Scalar> struct ei_functor_allows_mixing_real_and_complex<ei_scalar_product_op<Scalar> > { enum { ret = 1 }; };
+
 #endif // EIGEN_FUNCTORS_H
--- a/Eigen/src/Core/Fuzzy.h
+++ b/Eigen/src/Core/Fuzzy.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
 //
 // Eigen is free software; you can redistribute it and/or
@@ -176,19 +176,19 @@ struct ei_fuzzy_selector<Derived,OtherDerived,true>
  typedef typename Derived::RealScalar RealScalar;
  static bool isApprox(const Derived& self, const OtherDerived& other, RealScalar prec)
  {
-    EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived);
+    EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
    ei_assert(self.size() == other.size());
-    return((self - other).norm2() <= std::min(self.norm2(), other.norm2()) * prec * prec);
+    return((self - other).squaredNorm() <= std::min(self.squaredNorm(), other.squaredNorm()) * prec * prec);
  }
  static bool isMuchSmallerThan(const Derived& self, const RealScalar& other, RealScalar prec)
  {
-    return(self.norm2() <= ei_abs2(other * prec));
+    return(self.squaredNorm() <= ei_abs2(other * prec));
  }
  static bool isMuchSmallerThan(const Derived& self, const OtherDerived& other, RealScalar prec)
  {
-    EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived);
+    EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
    ei_assert(self.size() == other.size());
-    return(self.norm2() <= other.norm2() * prec * prec);
+    return(self.squaredNorm() <= other.squaredNorm() * prec * prec);
  }
 };

@@ -198,32 +198,32 @@ struct ei_fuzzy_selector<Derived,OtherDerived,false>
  typedef typename Derived::RealScalar RealScalar;
  static bool isApprox(const Derived& self, const OtherDerived& other, RealScalar prec)
  {
-    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived);
+    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
    ei_assert(self.rows() == other.rows() && self.cols() == other.cols());
    typename Derived::Nested nested(self);
    typename OtherDerived::Nested otherNested(other);
-    for(int i = 0; i < self.cols(); i++)
-      if((nested.col(i) - otherNested.col(i)).norm2()
-          > std::min(nested.col(i).norm2(), otherNested.col(i).norm2()) * prec * prec)
+    for(int i = 0; i < self.cols(); ++i)
+      if((nested.col(i) - otherNested.col(i)).squaredNorm()
+          > std::min(nested.col(i).squaredNorm(), otherNested.col(i).squaredNorm()) * prec * prec)
        return false;
    return true;
  }
  static bool isMuchSmallerThan(const Derived& self, const RealScalar& other, RealScalar prec)
  {
    typename Derived::Nested nested(self);
-    for(int i = 0; i < self.cols(); i++)
-      if(nested.col(i).norm2() > ei_abs2(other * prec))
+    for(int i = 0; i < self.cols(); ++i)
+      if(nested.col(i).squaredNorm() > ei_abs2(other * prec))
        return false;
    return true;
  }
  static bool isMuchSmallerThan(const Derived& self, const OtherDerived& other, RealScalar prec)
  {
-    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived);
+    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
    ei_assert(self.rows() == other.rows() && self.cols() == other.cols());
    typename Derived::Nested nested(self);
    typename OtherDerived::Nested otherNested(other);
-    for(int i = 0; i < self.cols(); i++)
-      if(nested.col(i).norm2() > otherNested.col(i).norm2() * prec * prec)
+    for(int i = 0; i < self.cols(); ++i)
+      if(nested.col(i).squaredNorm() > otherNested.col(i).squaredNorm() * prec * prec)
        return false;
    return true;
  }
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -34,6 +34,47 @@
  * of generic vectorized code.
  */

+struct ei_default_packet_traits
+{
+  enum {
+    HasAdd    = 1,
+    HasSub    = 1,
+    HasMul    = 1,
+    HasNegate = 1,
+    HasAbs    = 1,
+    HasMin    = 1,
+    HasMax    = 1,
+    
+    HasDiv    = 0,
+    HasSqrt   = 0,
+    HasExp    = 0,
+    HasLog    = 0,
+    HasPow    = 0,
+    
+    HasSin    = 0,
+    HasCos    = 0,
+    HasTan    = 0,
+    HasASin   = 0,
+    HasACos   = 0,
+    HasATan   = 0
+  };
+};
+
+template<typename T> struct ei_packet_traits : ei_default_packet_traits
+{
+  typedef T type;
+  enum {size=1};
+  enum {
+    HasAdd    = 0,
+    HasSub    = 0,
+    HasMul    = 0,
+    HasNegate = 0,
+    HasAbs    = 0,
+    HasMin    = 0,
+    HasMax    = 0
+  };
+};
+
 /** \internal \returns a + b (coeff-wise) */
 template<typename Packet> inline Packet
 ei_padd(const Packet& a,
@@ -44,6 +85,10 @@ template<typename Packet> inline Packet
 ei_psub(const Packet& a,
        const Packet& b) { return a-b; }

+/** \internal \returns -a (coeff-wise) */
+template<typename Packet> inline Packet
+ei_pnegate(const Packet& a) { return -a; }
+
 /** \internal \returns a * b (coeff-wise) */
 template<typename Packet> inline Packet
 ei_pmul(const Packet& a,
@@ -64,6 +109,26 @@ template<typename Packet> inline Packet
 ei_pmax(const Packet& a,
        const Packet& b) { return std::max(a, b); }

+/** \internal \returns the absolute value of \a a */
+template<typename Packet> inline Packet
+ei_pabs(const Packet& a) { return ei_abs(a); }
+
+/** \internal \returns the bitwise and of \a a and \a b */
+template<typename Packet> inline Packet
+ei_pand(const Packet& a, const Packet& b) { return a & b; }
+
+/** \internal \returns the bitwise or of \a a and \a b */
+template<typename Packet> inline Packet
+ei_por(const Packet& a, const Packet& b) { return a | b; }
+
+/** \internal \returns the bitwise xor of \a a and \a b */
+template<typename Packet> inline Packet
+ei_pxor(const Packet& a, const Packet& b) { return a ^ b; }
+
+/** \internal \returns the bitwise andnot of \a a and \a b */
+template<typename Packet> inline Packet
+ei_pandnot(const Packet& a, const Packet& b) { return a & (!b); }
+        
 /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
 template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
 ei_pload(const Scalar* from) { return *from; }
@@ -96,6 +161,40 @@ ei_preduxp(const Packet* vecs) { return vecs[0]; }
 template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux(const Packet& a)
 { return a; }

+/** \internal \returns the product of the elements of \a a*/
+template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_mul(const Packet& a)
+{ return a; }
+
+/** \internal \returns the min of the elements of \a a*/
+template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_min(const Packet& a)
+{ return a; }
+
+/** \internal \returns the max of the elements of \a a*/
+template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_max(const Packet& a)
+{ return a; }
+
+/** \internal \returns the reversed elements of \a a*/
+template<typename Packet> inline Packet ei_preverse(const Packet& a)
+{ return a; }
+
+/**************************
+* Special math functions
+***************************/
+
+/** \internal \returns the sin of \a a (coeff-wise) */
+template<typename Packet> inline static Packet ei_psin(Packet a) { return ei_sin(a); }
+
+/** \internal \returns the cos of \a a (coeff-wise) */
+template<typename Packet> inline static Packet ei_pcos(Packet a) { return ei_cos(a); }
+
+/** \internal \returns the exp of \a a (coeff-wise) */
+template<typename Packet> inline static Packet ei_pexp(Packet a) { return ei_exp(a); }
+
+/** \internal \returns the log of \a a (coeff-wise) */
+template<typename Packet> inline static Packet ei_plog(Packet a) { return ei_log(a); }
+
+/** \internal \returns the square-root of \a a (coeff-wise) */
+template<typename Packet> inline static Packet ei_psqrt(Packet a) { return ei_sqrt(a); }

 /***************************************************************************
 * The following functions might not have to be overwritten for vectorized types
--- a/Eigen/src/Core/IO.h
+++ b/Eigen/src/Core/IO.h
@@ -1,16 +1,17 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either 
+// License as published by the Free Software Foundation; either
 // version 3 of the License, or (at your option) any later version.
 //
 // Alternatively, you can redistribute it and/or
 // modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of 
+// published by the Free Software Foundation; either version 2 of
 // the License, or (at your option) any later version.
 //
 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
@@ -18,7 +19,7 @@
 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
 // GNU General Public License for more details.
 //
-// You should have received a copy of the GNU Lesser General Public 
+// You should have received a copy of the GNU Lesser General Public
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.

@@ -57,7 +58,7 @@ struct IOFormat
    coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags)
  {
    rowSpacer = "";
-    int i=matSuffix.length()-1;
+    int i = int(matSuffix.length())-1;
    while (i>=0 && matSuffix[i]!='\n')
    {
      rowSpacer += ' ';
@@ -80,7 +81,7 @@ struct IOFormat
  * This class represents an expression with stream operators controlled by a given IOFormat.
  * It is the return type of MatrixBase::format()
  * and most of the time this is the only way it is used.
-  * 
+  *
  * See class IOFormat for some examples.
  *
  * \sa MatrixBase::format(), class IOFormat
@@ -121,33 +122,33 @@ MatrixBase<Derived>::format(const IOFormat& fmt) const
 /** \internal
  * print the matrix \a _m to the output stream \a s using the output format \a fmt */
 template<typename Derived>
-std::ostream & ei_print_matrix(std::ostream & s, const MatrixBase<Derived> & _m,
-                               const IOFormat& fmt = IOFormat())
+std::ostream & ei_print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt)
 {
  const typename Derived::Nested m = _m;
+
  int width = 0;
  if (fmt.flags & AlignCols)
  {
    // compute the largest width
-    for(int j = 1; j < m.cols(); j++)
-      for(int i = 0; i < m.rows(); i++)
+    for(int j = 1; j < m.cols(); ++j)
+      for(int i = 0; i < m.rows(); ++i)
      {
        std::stringstream sstr;
        sstr.precision(fmt.precision);
        sstr << m.coeff(i,j);
-        width = std::max<int>(width, sstr.str().length());
+        width = std::max<int>(width, int(sstr.str().length()));
      }
  }
  s.precision(fmt.precision);
  s << fmt.matPrefix;
-  for(int i = 0; i < m.rows(); i++)
+  for(int i = 0; i < m.rows(); ++i)
  {
    if (i)
      s << fmt.rowSpacer;
    s << fmt.rowPrefix;
    if(width) s.width(width);
    s << m.coeff(i, 0);
-    for(int j = 1; j < m.cols(); j++)
+    for(int j = 1; j < m.cols(); ++j)
    {
      s << fmt.coeffSeparator;
      if (width) s.width(width);
@@ -163,8 +164,12 @@ std::ostream & ei_print_matrix(std::ostream & s, const MatrixBase<Derived> & _m,

 /** \relates MatrixBase
  *
-  * Outputs the matrix, laid out as an array as usual, to the given stream.
-  * You can control the way the matrix is printed using MatrixBase::format().
+  * Outputs the matrix, to the given stream.
+  *
+  * If you wish to print the matrix with a format different than the default, use MatrixBase::format().
+  *
+  * It is also possible to change the default format by defining EIGEN_DEFAULT_IO_FORMAT before including Eigen headers.
+  * If not defined, this will automatically be defined to Eigen::IOFormat(), that is the Eigen::IOFormat with default parameters.
  *
  * \sa MatrixBase::format()
  */
@@ -173,7 +178,7 @@ std::ostream & operator <<
 (std::ostream & s,
 const MatrixBase<Derived> & m)
 {
-  return ei_print_matrix(s, m.eval());
+  return ei_print_matrix(s, m.eval(), EIGEN_DEFAULT_IO_FORMAT);
 }

 #endif // EIGEN_IO_H
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
 //
 // Eigen is free software; you can redistribute it and/or
@@ -40,9 +40,15 @@
  * It can be used to let Eigen interface without any overhead with non-Eigen data structures,
  * such as plain C arrays or structures from other libraries.
  *
-  * This class is the return type of Matrix::map() but can also be used directly.
+  * \b Tips: to change the array of data mapped by a Map object, you can use the C++
+  * placement new syntax:
  *
-  * \sa Matrix::map()
+  * Example: \include Map_placement_new.cpp
+  * Output: \verbinclude Map_placement_new.out
+  *
+  * This class is the return type of Matrix::Map() but can also be used directly.
+  *
+  * \sa Matrix::Map()
  */
 template<typename MatrixType, int _PacketAccess>
 struct ei_traits<Map<MatrixType, _PacketAccess> > : public ei_traits<MatrixType>
@@ -66,12 +72,9 @@ template<typename MatrixType, int PacketAccess> class Map

    inline int stride() const { return this->innerSize(); }

-    AlignedDerivedType forceAligned()
+    AlignedDerivedType _convertToForceAligned()
    {
-      if (PacketAccess==ForceAligned)
-        return *this;
-      else
-        return Map<MatrixType,ForceAligned>(Base::m_data, Base::m_rows.value(), Base::m_cols.value());
+      return Map<MatrixType,ForceAligned>(Base::m_data, Base::m_rows.value(), Base::m_cols.value());
    }

    inline Map(const Scalar* data) : Base(data) {}
@@ -85,12 +88,12 @@ template<typename MatrixType, int PacketAccess> class Map
      EIGEN_ONLY_USED_FOR_DEBUG(rows);
      EIGEN_ONLY_USED_FOR_DEBUG(cols);
      ei_assert(rows == this->rows());
-      ei_assert(rows == this->cols());
+      ei_assert(cols == this->cols());
    }

    inline void resize(int size)
    {
-      EIGEN_STATIC_ASSERT_VECTOR_ONLY(MatrixType);
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(MatrixType)
      EIGEN_ONLY_USED_FOR_DEBUG(size);
      ei_assert(size == this->size());
    }
@@ -102,17 +105,13 @@ template<typename MatrixType, int PacketAccess> class Map
  * Only for fixed-size matrices and vectors.
  * \param data The array of data to copy
  *
-  * For dynamic-size matrices and vectors, see the variants taking additional int parameters
-  * for the dimensions.
-  *
-  * \sa Matrix(const Scalar *, int), Matrix(const Scalar *, int, int),
-  * Matrix::map(const Scalar *)
+  * \sa Matrix::Map(const Scalar *)
  */
 template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder, int _MaxRows, int _MaxCols>
 inline Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols>
  ::Matrix(const Scalar *data)
 {
-  *this = Map<Matrix>(data);
+  _set_noalias(Eigen::Map<Matrix>(data));
 }

 #endif // EIGEN_MAP_H
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
 //
 // Eigen is free software; you can redistribute it and/or
@@ -53,7 +53,7 @@ template<typename Derived> class MapBase
      ColsAtCompileTime = ei_traits<Derived>::ColsAtCompileTime,
      SizeAtCompileTime = Base::SizeAtCompileTime
    };
-    
+
    typedef typename ei_traits<Derived>::AlignedDerivedType AlignedDerivedType;
    typedef typename ei_traits<Derived>::Scalar Scalar;
    typedef typename Base::PacketScalar PacketScalar;
@@ -63,10 +63,22 @@ template<typename Derived> class MapBase
    inline int cols() const { return m_cols.value(); }

    inline int stride() const { return derived().stride(); }
+    inline const Scalar* data() const { return m_data; }
+
+    template<bool IsForceAligned,typename Dummy> struct force_aligned_impl {
+      AlignedDerivedType static run(MapBase& a) { return a.derived(); }
+    };
+
+    template<typename Dummy> struct force_aligned_impl<false,Dummy> {
+      AlignedDerivedType static run(MapBase& a) { return a.derived()._convertToForceAligned(); }
+    };

    /** \returns an expression equivalent to \c *this but having the \c PacketAccess constant
      * set to \c ForceAligned. Must be reimplemented by the derived class. */
-    AlignedDerivedType forceAligned() { return derived().forceAligned(); }
+    AlignedDerivedType forceAligned()
+    {
+      return force_aligned_impl<int(PacketAccess)==int(ForceAligned),Derived>::run(*this);
+    }

    inline const Scalar& coeff(int row, int col) const
    {
@@ -83,8 +95,8 @@ template<typename Derived> class MapBase
      else // column-major
        return const_cast<Scalar*>(m_data)[row + col * stride()];
    }
-    
-    inline const Scalar coeff(int index) const
+
+    inline const Scalar& coeff(int index) const
    {
      ei_assert(Derived::IsVectorAtCompileTime || (ei_traits<Derived>::Flags & LinearAccessBit));
      if ( ((RowsAtCompileTime == 1) == IsRowMajor) )
@@ -95,7 +107,11 @@ template<typename Derived> class MapBase

    inline Scalar& coeffRef(int index)
    {
-      return *const_cast<Scalar*>(m_data + index);
+      ei_assert(Derived::IsVectorAtCompileTime || (ei_traits<Derived>::Flags & LinearAccessBit));
+      if ( ((RowsAtCompileTime == 1) == IsRowMajor) )
+        return const_cast<Scalar*>(m_data)[index];
+      else
+        return const_cast<Scalar*>(m_data)[index*stride()];
    }

    template<int LoadMode>
@@ -138,28 +154,42 @@ template<typename Derived> class MapBase
              m_cols(ColsAtCompileTime == Dynamic ? size : ColsAtCompileTime)
    {
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-      ei_assert(size > 0);
+      ei_assert(size > 0 || data == 0);
      ei_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == size);
    }

    inline MapBase(const Scalar* data, int rows, int cols)
            : m_data(data), m_rows(rows), m_cols(cols)
    {
-      ei_assert(rows > 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
-             && cols > 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
+      ei_assert( (data == 0)
+              || (   rows > 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
+                  && cols > 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
    }

+    Derived& operator=(const MapBase& other)
+    {
+      return Base::operator=(other);
+    }
+
+    template<typename OtherDerived>
+    Derived& operator=(const MatrixBase<OtherDerived>& other)
+    {
+      return Base::operator=(other);
+    }
+
+    using Base::operator*=;
+
    template<typename OtherDerived>
    Derived& operator+=(const MatrixBase<OtherDerived>& other)
    { return derived() = forceAligned() + other; }
-    
+
    template<typename OtherDerived>
    Derived& operator-=(const MatrixBase<OtherDerived>& other)
    { return derived() = forceAligned() - other; }

    Derived& operator*=(const Scalar& other)
    { return derived() = forceAligned() * other; }
-    
+
    Derived& operator/=(const Scalar& other)
    { return derived() = forceAligned() / other; }

--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -26,6 +26,7 @@
 #define EIGEN_MATHFUNCTIONS_H

 template<typename T> inline typename NumTraits<T>::Real precision();
+template<typename T> inline typename NumTraits<T>::Real machine_epsilon();
 template<typename T> inline T ei_random(T a, T b);
 template<typename T> inline T ei_random();
 template<typename T> inline T ei_random_amplitude()
@@ -34,11 +35,23 @@ template<typename T> inline T ei_random_amplitude()
  else return static_cast<T>(10);
 }

+template<typename T> inline typename NumTraits<T>::Real ei_hypot(T x, T y)
+{
+  typedef typename NumTraits<T>::Real RealScalar;
+  RealScalar _x = ei_abs(x);
+  RealScalar _y = ei_abs(y);
+  T p = std::max(_x, _y);
+  T q = std::min(_x, _y);
+  T qp = q/p;
+  return p * ei_sqrt(T(1) + qp*qp);
+}
+
 /**************
 ***   int   ***
 **************/

 template<> inline int precision<int>() { return 0; }
+template<> inline int machine_epsilon<int>() { return 0; }
 inline int ei_real(int x)  { return x; }
 inline int ei_imag(int)    { return 0; }
 inline int ei_conj(int x)  { return x; }
@@ -49,12 +62,20 @@ inline int ei_exp(int)  { ei_assert(false); return 0; }
 inline int ei_log(int)  { ei_assert(false); return 0; }
 inline int ei_sin(int)  { ei_assert(false); return 0; }
 inline int ei_cos(int)  { ei_assert(false); return 0; }
-
-#if EIGEN_GNUC_AT_LEAST(4,3)
-inline int ei_pow(int x, int y) { return std::pow(x, y); }
-#else
-inline int ei_pow(int x, int y) { return int(std::pow(double(x), y)); }
-#endif
+inline int ei_pow(int x, int y)
+{
+  int res = 1;
+  if(y < 0) return 0;
+  if(y & 1) res *= x;
+  y >>= 1;
+  while(y)
+  {
+    x *= x;
+    if(y&1) res *= x;
+    y >>= 1;
+  }
+  return res;
+}

 template<> inline int ei_random(int a, int b)
 {
@@ -83,6 +104,7 @@ inline bool ei_isApproxOrLessThan(int a, int b, int = precision<int>())
 **************/

 template<> inline float precision<float>() { return 1e-5f; }
+template<> inline float machine_epsilon<float>() { return 1.192e-07f; }
 inline float ei_real(float x)  { return x; }
 inline float ei_imag(float)    { return 0.f; }
 inline float ei_conj(float x)  { return x; }
@@ -101,9 +123,9 @@ template<> inline float ei_random(float a, float b)
  int i;
  do { i = ei_random<int>(256*int(a),256*int(b));
  } while(i==0);
-  return i/256.f;
+  return float(i)/256.f;
 #else
-  return a + (b-a) * std::rand() / RAND_MAX;
+  return a + (b-a) * float(std::rand()) / float(RAND_MAX);
 #endif
 }
 template<> inline float ei_random()
@@ -128,6 +150,8 @@ inline bool ei_isApproxOrLessThan(float a, float b, float prec = precision<float
 **************/

 template<> inline double precision<double>() { return 1e-11; }
+template<> inline double machine_epsilon<double>() { return 2.220e-16; }
+
 inline double ei_real(double x)  { return x; }
 inline double ei_imag(double)    { return 0.; }
 inline double ei_conj(double x)  { return x; }
@@ -138,7 +162,7 @@ inline double ei_exp(double x)   { return std::exp(x); }
 inline double ei_log(double x)   { return std::log(x); }
 inline double ei_sin(double x)   { return std::sin(x); }
 inline double ei_cos(double x)   { return std::cos(x); }
-inline double ei_pow(double x, double y)  { return std::pow(x, y); }
+inline double ei_pow(double x, double y) { return std::pow(x, y); }

 template<> inline double ei_random(double a, double b)
 {
@@ -173,6 +197,7 @@ inline bool ei_isApproxOrLessThan(double a, double b, double prec = precision<do
 *********************/

 template<> inline float precision<std::complex<float> >() { return precision<float>(); }
+template<> inline float machine_epsilon<std::complex<float> >() { return machine_epsilon<float>(); }
 inline float ei_real(const std::complex<float>& x) { return std::real(x); }
 inline float ei_imag(const std::complex<float>& x) { return std::imag(x); }
 inline std::complex<float> ei_conj(const std::complex<float>& x) { return std::conj(x); }
@@ -206,6 +231,7 @@ inline bool ei_isApprox(const std::complex<float>& a, const std::complex<float>&
 **********************/

 template<> inline double precision<std::complex<double> >() { return precision<double>(); }
+template<> inline double machine_epsilon<std::complex<double> >() { return machine_epsilon<double>(); }
 inline double ei_real(const std::complex<double>& x) { return std::real(x); }
 inline double ei_imag(const std::complex<double>& x) { return std::imag(x); }
 inline std::complex<double> ei_conj(const std::complex<double>& x) { return std::conj(x); }
@@ -240,6 +266,7 @@ inline bool ei_isApprox(const std::complex<double>& a, const std::complex<double
 ******************/

 template<> inline long double precision<long double>() { return precision<double>(); }
+template<> inline long double machine_epsilon<long double>() { return 1.084e-19l; }
 inline long double ei_real(long double x)  { return x; }
 inline long double ei_imag(long double)    { return 0.; }
 inline long double ei_conj(long double x)  { return x; }
@@ -254,7 +281,7 @@ inline long double ei_pow(long double x, long double y)  { return std::pow(x, y)

 template<> inline long double ei_random(long double a, long double b)
 {
-  return ei_random<double>(a,b);
+  return ei_random<double>(static_cast<double>(a),static_cast<double>(b));
 }
 template<> inline long double ei_random()
 {
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -30,58 +30,82 @@
  *
  * \brief The matrix class, also used for vectors and row-vectors
  *
-  * \param _Scalar the scalar type, i.e. the type of the coefficients
-  * \param _Rows the number of rows at compile-time. Use the special value \a Dynamic to
-  *              specify that the number of rows is dynamic, i.e. is not fixed at compile-time.
-  * \param _Cols the number of columns at compile-time. Use the special value \a Dynamic to
-  *              specify that the number of columns is dynamic, i.e. is not fixed at compile-time.
-  * \param _StorageOrder can be either RowMajor or ColMajor. The default is ColMajor.
-  * \param _MaxRows the maximum number of rows at compile-time. By default this is equal to \a _Rows.
-  *              The most common exception is when you don't know the exact number of rows, but know that
-  *              it is smaller than some given value. Then you can set \a _MaxRows to that value, and set
-  *              _Rows to \a Dynamic.
-  * \param _MaxCols the maximum number of cols at compile-time. By default this is equal to \a _Cols.
-  *              The most common exception is when you don't know the exact number of cols, but know that
-  *              it is smaller than some given value. Then you can set \a _MaxCols to that value, and set
-  *              _Cols to \a Dynamic.
+  * The %Matrix class is the work-horse for all \em dense (\ref dense "note") matrices and vectors within Eigen.
+  * Vectors are matrices with one column, and row-vectors are matrices with one row.
  *
-  * This single class template covers all kinds of matrix and vectors that Eigen can handle.
-  * All matrix and vector types are just typedefs to specializations of this class template.
+  * The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note").
  *
-  * These typedefs are as follows:
-  * \li \c %Matrix\#\#Size\#\#Type for square matrices
-  * \li \c Vector\#\#Size\#\#Type for vectors (matrices with one column)
-  * \li \c RowVector\#\#Size\#\#Type for row-vectors (matrices with one row)
+  * The first three template parameters are required:
+  * \param _Scalar Numeric type, i.e. float, double, int
+  * \param _Rows Number of rows, or \b Dynamic
+  * \param _Cols Number of columns, or \b Dynamic
  *
-  * where \c Size can be
-  * \li \c 2 for fixed size 2
-  * \li \c 3 for fixed size 3
-  * \li \c 4 for fixed size 4
-  * \li \c X for dynamic size
+  * The remaining template parameters are optional -- in most cases you don't have to worry about them.
+  * \param _Options A combination of either \b RowMajor or \b ColMajor, and of either
+  *                 \b AutoAlign or \b DontAlign.
+  *                 The former controls storage order, and defaults to column-major. The latter controls alignment, which is required
+  *                 for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
+  * \param _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note").
+  * \param _MaxCols Maximum number of columns. Defaults to \a _Cols (\ref maxrows "note").
  *
-  * and \c Type can be
-  * \li \c i for type \c int
-  * \li \c f for type \c float
-  * \li \c d for type \c double
-  * \li \c cf for type \c std::complex<float>
-  * \li \c cd for type \c std::complex<double>
+  * Eigen provides a number of typedefs covering the usual cases. Here are some examples:
  *
-  * Examples:
-  * \li \c Matrix2d is a typedef for \c Matrix<double,2,2>
-  * \li \c VectorXf is a typedef for \c Matrix<float,Dynamic,1>
-  * \li \c RowVector3i is a typedef for \c Matrix<int,1,3>
+  * \li \c Matrix2d is a 2x2 square matrix of doubles (\c Matrix<double, 2, 2>)
+  * \li \c Vector4f is a vector of 4 floats (\c Matrix<float, 4, 1>)
+  * \li \c RowVector3i is a row-vector of 3 ints (\c Matrix<int, 1, 3>)
  *
-  * See \ref matrixtypedefs for an explicit list of all matrix typedefs.
+  * \li \c MatrixXf is a dynamic-size matrix of floats (\c Matrix<float, Dynamic, Dynamic>)
+  * \li \c VectorXf is a dynamic-size vector of floats (\c Matrix<float, Dynamic, 1>)
  *
-  * Of course these typedefs do not exhaust all the possibilities offered by the Matrix class
-  * template, they only address some of the most common cases. For instance, if you want a
-  * fixed-size matrix with 3 rows and 5 columns, there is no typedef for that, so you should use
-  * \c Matrix<double,3,5>.
+  * See \link matrixtypedefs this page \endlink for a complete list of predefined \em %Matrix and \em Vector typedefs.
  *
-  * Note that most of the API is in the base class MatrixBase.
+  * You can access elements of vectors and matrices using normal subscripting:
+  *
+  * \code
+  * Eigen::VectorXd v(10);
+  * v[0] = 0.1;
+  * v[1] = 0.2;
+  * v(0) = 0.3;
+  * v(1) = 0.4;
+  *
+  * Eigen::MatrixXi m(10, 10);
+  * m(0, 1) = 1;
+  * m(0, 2) = 2;
+  * m(0, 3) = 3;
+  * \endcode
+  *
+  * <i><b>Some notes:</b></i>
+  *
+  * <dl>
+  * <dt><b>\anchor dense Dense versus sparse:</b></dt>
+  * <dd>This %Matrix class handles dense, not sparse matrices and vectors. For sparse matrices and vectors, see the Sparse module.
+  *
+  * Dense matrices and vectors are plain usual arrays of coefficients. All the coefficients are stored, in an ordinary contiguous array.
+  * This is unlike Sparse matrices and vectors where the coefficients are stored as a list of nonzero coefficients.</dd>
+  *
+  * <dt><b>\anchor fixedsize Fixed-size versus dynamic-size:</b></dt>
+  * <dd>Fixed-size means that the numbers of rows and columns are known are compile-time. In this case, Eigen allocates the array
+  * of coefficients as a fixed-size array, as a class member. This makes sense for very small matrices, typically up to 4x4, sometimes up
+  * to 16x16. Larger matrices should be declared as dynamic-size even if one happens to know their size at compile-time.
+  *
+  * Dynamic-size means that the numbers of rows or columns are not necessarily known at compile-time. In this case they are runtime
+  * variables, and the array of coefficients is allocated dynamically on the heap.
+  *
+  * Note that \em dense matrices, be they Fixed-size or Dynamic-size, <em>do not</em> expand dynamically in the sense of a std::map.
+  * If you want this behavior, see the Sparse module.</dd>
+  *
+  * <dt><b>\anchor maxrows _MaxRows and _MaxCols:</b></dt>
+  * <dd>In most cases, one just leaves these parameters to the default values.
+  * These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases
+  * when the exact numbers of rows and columns are not known are compile-time, but it is known at compile-time that they cannot
+  * exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case _MaxRows and _MaxCols
+  * are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>
+  * </dl>
+  *
+  * \see MatrixBase for the majority of the API methods for matrices
  */
-template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder, int _MaxRows, int _MaxCols>
-struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols> >
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
 {
  typedef _Scalar Scalar;
  enum {
@@ -89,33 +113,35 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols
    ColsAtCompileTime = _Cols,
    MaxRowsAtCompileTime = _MaxRows,
    MaxColsAtCompileTime = _MaxCols,
-    Flags = ei_compute_matrix_flags<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols>::ret,
-    CoeffReadCost = NumTraits<Scalar>::ReadCost,
-    SupportedAccessPatterns = RandomAccessPattern
+    Flags = ei_compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
+    CoeffReadCost = NumTraits<Scalar>::ReadCost
  };
 };

-template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder, int _MaxRows, int _MaxCols>
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 class Matrix
-  : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols> >
-    #ifdef EIGEN_VECTORIZE
-    , public ei_with_aligned_operator_new<_Scalar,ei_size_at_compile_time<_Rows,_Cols>::ret>
-    #endif
+  : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
 {
  public:
    EIGEN_GENERIC_PUBLIC_INTERFACE(Matrix)
+    enum { Options = _Options };
    friend class Eigen::Map<Matrix, Unaligned>;
+    typedef class Eigen::Map<Matrix, Unaligned> UnalignedMapType;
    friend class Eigen::Map<Matrix, Aligned>;
+    typedef class Eigen::Map<Matrix, Aligned> AlignedMapType;

  protected:
-    ei_matrix_storage<Scalar, MaxSizeAtCompileTime, RowsAtCompileTime, ColsAtCompileTime> m_storage;
+    ei_matrix_storage<Scalar, MaxSizeAtCompileTime, RowsAtCompileTime, ColsAtCompileTime, Options> m_storage;

  public:
+    enum { NeedsToAlign = (!(Options&DontAlign))
+                          && SizeAtCompileTime!=Dynamic && ((sizeof(Scalar)*SizeAtCompileTime)%16)==0 };
+    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)

-    inline int rows() const { return m_storage.rows(); }
-    inline int cols() const { return m_storage.cols(); }
+    EIGEN_STRONG_INLINE int rows() const { return m_storage.rows(); }
+    EIGEN_STRONG_INLINE int cols() const { return m_storage.cols(); }

-    inline int stride(void) const
+    EIGEN_STRONG_INLINE int stride(void) const
    {
      if(Flags & RowMajorBit)
        return m_storage.cols();
@@ -123,7 +149,7 @@ class Matrix
        return m_storage.rows();
    }

-    inline const Scalar& coeff(int row, int col) const
+    EIGEN_STRONG_INLINE const Scalar& coeff(int row, int col) const
    {
      if(Flags & RowMajorBit)
        return m_storage.data()[col + row * m_storage.cols()];
@@ -131,12 +157,12 @@ class Matrix
        return m_storage.data()[row + col * m_storage.rows()];
    }

-    inline const Scalar& coeff(int index) const
+    EIGEN_STRONG_INLINE const Scalar& coeff(int index) const
    {
      return m_storage.data()[index];
    }

-    inline Scalar& coeffRef(int row, int col)
+    EIGEN_STRONG_INLINE Scalar& coeffRef(int row, int col)
    {
      if(Flags & RowMajorBit)
        return m_storage.data()[col + row * m_storage.cols()];
@@ -144,13 +170,13 @@ class Matrix
        return m_storage.data()[row + col * m_storage.rows()];
    }

-    inline Scalar& coeffRef(int index)
+    EIGEN_STRONG_INLINE Scalar& coeffRef(int index)
    {
      return m_storage.data()[index];
    }

    template<int LoadMode>
-    inline PacketScalar packet(int row, int col) const
+    EIGEN_STRONG_INLINE PacketScalar packet(int row, int col) const
    {
      return ei_ploadt<Scalar, LoadMode>
               (m_storage.data() + (Flags & RowMajorBit
@@ -159,13 +185,13 @@ class Matrix
    }

    template<int LoadMode>
-    inline PacketScalar packet(int index) const
+    EIGEN_STRONG_INLINE PacketScalar packet(int index) const
    {
      return ei_ploadt<Scalar, LoadMode>(m_storage.data() + index);
    }

    template<int StoreMode>
-    inline void writePacket(int row, int col, const PacketScalar& x)
+    EIGEN_STRONG_INLINE void writePacket(int row, int col, const PacketScalar& x)
    {
      ei_pstoret<Scalar, PacketScalar, StoreMode>
              (m_storage.data() + (Flags & RowMajorBit
@@ -174,30 +200,43 @@ class Matrix
    }

    template<int StoreMode>
-    inline void writePacket(int index, const PacketScalar& x)
+    EIGEN_STRONG_INLINE void writePacket(int index, const PacketScalar& x)
    {
      ei_pstoret<Scalar, PacketScalar, StoreMode>(m_storage.data() + index, x);
    }

    /** \returns a const pointer to the data array of this matrix */
-    inline const Scalar *data() const
+    EIGEN_STRONG_INLINE const Scalar *data() const
    { return m_storage.data(); }

    /** \returns a pointer to the data array of this matrix */
-    inline Scalar *data()
+    EIGEN_STRONG_INLINE Scalar *data()
    { return m_storage.data(); }

+    /** Resizes \c *this to a \a rows x \a cols matrix.
+      *
+      * Makes sense for dynamic-size matrices only.
+      *
+      * If the current number of coefficients of \c *this exactly matches the
+      * product \a rows * \a cols, then no memory allocation is performed and
+      * the current values are left unchanged. In all other cases, including
+      * shrinking, the data is reallocated and all previous values are lost.
+      *
+      * \sa resize(int) for vectors.
+      */
    inline void resize(int rows, int cols)
    {
-      ei_assert(rows > 0
-          && (MaxRowsAtCompileTime == Dynamic || MaxRowsAtCompileTime >= rows)
-          && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
-          && cols > 0
-          && (MaxColsAtCompileTime == Dynamic || MaxColsAtCompileTime >= cols)
-          && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
+      ei_assert((MaxRowsAtCompileTime == Dynamic || MaxRowsAtCompileTime >= rows)
+             && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
+             && (MaxColsAtCompileTime == Dynamic || MaxColsAtCompileTime >= cols)
+             && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
      m_storage.resize(rows * cols, rows, cols);
    }

+    /** Resizes \c *this to a vector of length \a size
+      *
+      * \sa resize(int,int) for the details.
+      */
    inline void resize(int size)
    {
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix)
@@ -207,16 +246,15 @@ class Matrix
        m_storage.resize(size, size, 1);
    }

-    /** Copies the value of the expression \a other into *this.
+    /** Resizes *this to have the same dimensions as \a other.
+      * Takes care of doing all the checking that's needed.
      *
-      * *this is resized (if possible) to match the dimensions of \a other.
-      *
-      * As a special exception, copying a row-vector into a vector (and conversely)
-      * is allowed. The resizing, if any, is then done in the appropriate way so that
-      * row-vectors remain row-vectors and vectors remain vectors.
+      * Note that copying a row-vector into a vector (and conversely) is allowed.
+      * The resizing, if any, is then done in the appropriate way so that row-vectors
+      * remain row-vectors and vectors remain vectors.
      */
    template<typename OtherDerived>
-    inline Matrix& operator=(const MatrixBase<OtherDerived>& other)
+    EIGEN_STRONG_INLINE void resizeLike(const MatrixBase<OtherDerived>& other)
    {
      if(RowsAtCompileTime == 1)
      {
@@ -229,17 +267,35 @@ class Matrix
        resize(other.size(), 1);
      }
      else resize(other.rows(), other.cols());
-      return Base::operator=(other.derived());
+    }
+
+    /** Copies the value of the expression \a other into \c *this with automatic resizing.
+      *
+      * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
+      * it will be initialized.
+      *
+      * Note that copying a row-vector into a vector (and conversely) is allowed.
+      * The resizing, if any, is then done in the appropriate way so that row-vectors
+      * remain row-vectors and vectors remain vectors.
+      */
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase<OtherDerived>& other)
+    {
+      return _set(other);
    }

    /** This is a special case of the templated operator=. Its purpose is to
      * prevent a default operator= from hiding the templated operator=.
      */
-    inline Matrix& operator=(const Matrix& other)
+    EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)
    {
-      return operator=<Matrix>(other);
+      return  _set(other);
    }

+    template<typename OtherDerived,typename OtherEvalType>
+    EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived,OtherEvalType>& func)
+    { return Base::operator=(func); }
+
    EIGEN_INHERIT_ASSIGNMENT_OPERATOR(Matrix, +=)
    EIGEN_INHERIT_ASSIGNMENT_OPERATOR(Matrix, -=)
    EIGEN_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Matrix, *=)
@@ -249,78 +305,71 @@ class Matrix
      *
      * For fixed-size matrices, does nothing.
      *
-      * For dynamic-size matrices, initializes with initial size 1x1, which is inefficient, hence
-      * when performance matters one should avoid using this constructor on dynamic-size matrices.
+      * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix
+      * is called a null matrix. This constructor is the unique way to create null matrices: resizing
+      * a matrix to 0 is not supported.
+      *
+      * \sa resize(int,int)
      */
-    inline explicit Matrix() : m_storage(1, 1, 1)
+    EIGEN_STRONG_INLINE explicit Matrix() : m_storage()
    {
-      ei_assert(RowsAtCompileTime > 0 && ColsAtCompileTime > 0);
+      _check_template_params();
    }

+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** \internal */
+    Matrix(ei_constructor_without_unaligned_array_assert)
+      : m_storage(ei_constructor_without_unaligned_array_assert())
+    {}
+#endif
+
    /** Constructs a vector or row-vector with given dimension. \only_for_vectors
      *
      * Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
      * it is redundant to pass the dimension here, so it makes more sense to use the default
      * constructor Matrix() instead.
      */
-    inline explicit Matrix(int dim)
+    EIGEN_STRONG_INLINE explicit Matrix(int dim)
      : m_storage(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim)
    {
+      _check_template_params();
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix)
      ei_assert(dim > 0);
      ei_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
    }

-    /** This constructor has two very different behaviors, depending on the type of *this.
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename T0, typename T1>
+    EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y)
+    {
+      _check_template_params();
+      _init2<T0,T1>(x, y);
+    }
+    #else
+    /** constructs an uninitialized matrix with \a rows rows and \a cols columns.
      *
-      * \li When Matrix is a fixed-size vector type of size 2, this constructor constructs
-      *     an initialized vector. The parameters \a x, \a y are copied into the first and second
-      *     coords of the vector respectively.
-      * \li Otherwise, this constructor constructs an uninitialized matrix with \a x rows and
-      *     \a y columns. This is useful for dynamic-size matrices. For fixed-size matrices,
-      *     it is redundant to pass these parameters, so one should use the default constructor
-      *     Matrix() instead.
-      */
-    inline Matrix(int x, int y) : m_storage(x*y, x, y)
-    {
-      if((RowsAtCompileTime == 1 && ColsAtCompileTime == 2)
-      || (RowsAtCompileTime == 2 && ColsAtCompileTime == 1))
-      {
-        m_storage.data()[0] = Scalar(x);
-        m_storage.data()[1] = Scalar(y);
-      }
-      else
-      {
-        ei_assert(x > 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == x)
-               && y > 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == y));
-      }
-    }
+      * This is useful for dynamic-size matrices. For fixed-size matrices,
+      * it is redundant to pass these parameters, so one should use the default constructor
+      * Matrix() instead. */
+    Matrix(int rows, int cols);
    /** constructs an initialized 2D vector with given coefficients */
-    inline Matrix(const float& x, const float& y)
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 2);
-      m_storage.data()[0] = x;
-      m_storage.data()[1] = y;
-    }
-    /** constructs an initialized 2D vector with given coefficients */
-    inline Matrix(const double& x, const double& y)
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 2);
-      m_storage.data()[0] = x;
-      m_storage.data()[1] = y;
-    }
+    Matrix(const Scalar& x, const Scalar& y);
+    #endif
+
    /** constructs an initialized 3D vector with given coefficients */
-    inline Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
+    EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
    {
-      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3);
+      _check_template_params();
+      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3)
      m_storage.data()[0] = x;
      m_storage.data()[1] = y;
      m_storage.data()[2] = z;
    }
    /** constructs an initialized 4D vector with given coefficients */
-    inline Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
+    EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
    {
-      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4);
+      _check_template_params();
+      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4)
      m_storage.data()[0] = x;
      m_storage.data()[1] = y;
      m_storage.data()[2] = z;
@@ -331,33 +380,30 @@ class Matrix

    /** Constructor copying the value of the expression \a other */
    template<typename OtherDerived>
-    inline Matrix(const MatrixBase<OtherDerived>& other)
+    EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other)
             : m_storage(other.rows() * other.cols(), other.rows(), other.cols())
    {
-      ei_assign_selector<Matrix,OtherDerived,false>::run(*this, other.derived());
-      //Base::operator=(other.derived());
+      _check_template_params();
+      _set_noalias(other);
    }
    /** Copy constructor */
-    inline Matrix(const Matrix& other)
+    EIGEN_STRONG_INLINE Matrix(const Matrix& other)
            : Base(), m_storage(other.rows() * other.cols(), other.rows(), other.cols())
    {
-      Base::lazyAssign(other);
+      _check_template_params();
+      _set_noalias(other);
    }
+    /** Copy constructor with in-place evaluation */
+    template<typename OtherDerived,typename OtherEvalType>
+    EIGEN_STRONG_INLINE Matrix(const ReturnByValue<OtherDerived,OtherEvalType>& other)
+    { other.evalTo(*this); }
    /** Destructor */
    inline ~Matrix() {}

-    /** Override MatrixBase::eval() since matrices don't need to be evaluated, it is enough to just read them.
-      * This prevents a useless copy when doing e.g. "m1 = m2.eval()"
-      */
-    const Matrix& eval() const
-    {
-      return *this;
-    }
-
    /** Override MatrixBase::swap() since for dynamic-sized matrices of same type it is enough to swap the
      * data pointers.
      */
-    void swap(Matrix& other)
+    inline void swap(Matrix& other)
    {
      if (Base::SizeAtCompileTime==Dynamic)
        m_storage.swap(other.m_storage);
@@ -365,12 +411,147 @@ class Matrix
        this->Base::swap(other);
    }

+    /** \name Map
+      * These are convenience functions returning Map objects. The Map() static functions return unaligned Map objects,
+      * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
+      * \a data pointers.
+      *
+      * \see class Map
+      */
+    //@{
+    inline static const UnalignedMapType Map(const Scalar* data)
+    { return UnalignedMapType(data); }
+    inline static UnalignedMapType Map(Scalar* data)
+    { return UnalignedMapType(data); }
+    inline static const UnalignedMapType Map(const Scalar* data, int size)
+    { return UnalignedMapType(data, size); }
+    inline static UnalignedMapType Map(Scalar* data, int size)
+    { return UnalignedMapType(data, size); }
+    inline static const UnalignedMapType Map(const Scalar* data, int rows, int cols)
+    { return UnalignedMapType(data, rows, cols); }
+    inline static UnalignedMapType Map(Scalar* data, int rows, int cols)
+    { return UnalignedMapType(data, rows, cols); }
+
+    inline static const AlignedMapType MapAligned(const Scalar* data)
+    { return AlignedMapType(data); }
+    inline static AlignedMapType MapAligned(Scalar* data)
+    { return AlignedMapType(data); }
+    inline static const AlignedMapType MapAligned(const Scalar* data, int size)
+    { return AlignedMapType(data, size); }
+    inline static AlignedMapType MapAligned(Scalar* data, int size)
+    { return AlignedMapType(data, size); }
+    inline static const AlignedMapType MapAligned(const Scalar* data, int rows, int cols)
+    { return AlignedMapType(data, rows, cols); }
+    inline static AlignedMapType MapAligned(Scalar* data, int rows, int cols)
+    { return AlignedMapType(data, rows, cols); }
+    //@}
+
+    using Base::setConstant;
+    Matrix& setConstant(int size, const Scalar& value);
+    Matrix& setConstant(int rows, int cols, const Scalar& value);
+
+    using Base::setZero;
+    Matrix& setZero(int size);
+    Matrix& setZero(int rows, int cols);
+
+    using Base::setOnes;
+    Matrix& setOnes(int size);
+    Matrix& setOnes(int rows, int cols);
+
+    using Base::setRandom;
+    Matrix& setRandom(int size);
+    Matrix& setRandom(int rows, int cols);
+
+    using Base::setIdentity;
+    Matrix& setIdentity(int rows, int cols);
+
 /////////// Geometry module ///////////

    template<typename OtherDerived>
    explicit Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
    template<typename OtherDerived>
    Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
+
+    // allow to extend Matrix outside Eigen
+    #ifdef EIGEN_MATRIX_PLUGIN
+    #include EIGEN_MATRIX_PLUGIN
+    #endif
+
+  private:
+    /** \internal Resizes *this in preparation for assigning \a other to it.
+      * Takes care of doing all the checking that's needed.
+      *
+      * Note that copying a row-vector into a vector (and conversely) is allowed.
+      * The resizing, if any, is then done in the appropriate way so that row-vectors
+      * remain row-vectors and vectors remain vectors.
+      */
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE void _resize_to_match(const MatrixBase<OtherDerived>& other)
+    {
+      #ifdef EIGEN_NO_AUTOMATIC_RESIZING
+      ei_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size())
+                 : (rows() == other.rows() && cols() == other.cols())))
+        && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
+      #endif
+      resizeLike(other);
+    }
+
+    /** \internal Copies the value of the expression \a other into \c *this with automatic resizing.
+      *
+      * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
+      * it will be initialized.
+      *
+      * Note that copying a row-vector into a vector (and conversely) is allowed.
+      * The resizing, if any, is then done in the appropriate way so that row-vectors
+      * remain row-vectors and vectors remain vectors.
+      *
+      * \sa operator=(const MatrixBase<OtherDerived>&), _set_noalias()
+      */
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE Matrix& _set(const MatrixBase<OtherDerived>& other)
+    {
+      _resize_to_match(other);
+      return Base::operator=(other);
+    }
+
+    /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which
+      * is the case when creating a new matrix) so one can enforce lazy evaluation.
+      *
+      * \sa operator=(const MatrixBase<OtherDerived>&), _set()
+      */
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE Matrix& _set_noalias(const MatrixBase<OtherDerived>& other)
+    {
+      _resize_to_match(other);
+      // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because
+      // it wouldn't allow to copy a row-vector into a column-vector.
+      return ei_assign_selector<Matrix,OtherDerived,false>::run(*this, other.derived());
+    }
+
+    static EIGEN_STRONG_INLINE void _check_template_params()
+    {
+        EIGEN_STATIC_ASSERT(((_MaxRows >= _Rows || _Rows==Dynamic)
+                          && (_MaxCols >= _Cols || _Cols==Dynamic)
+                          && ((_MaxRows==Dynamic?1:_MaxRows)*(_MaxCols==Dynamic?1:_MaxCols)<Dynamic)
+                          && (_Options & (DontAlign|RowMajor)) == _Options),
+          INVALID_MATRIX_TEMPLATE_PARAMETERS)
+    }
+
+
+    template<typename T0, typename T1>
+    EIGEN_STRONG_INLINE void _init2(int rows, int cols, typename ei_enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
+    {
+      ei_assert(rows > 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
+             && cols > 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
+      m_storage.resize(rows*cols,rows,cols);
+    }
+    template<typename T0, typename T1>
+    EIGEN_STRONG_INLINE void _init2(const Scalar& x, const Scalar& y, typename ei_enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 2)
+      m_storage.data()[0] = x;
+      m_storage.data()[1] = y;
+    }
 };

 /** \defgroup matrixtypedefs Global matrix typedefs
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -1,7 +1,8 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -33,7 +34,7 @@
  * types. Most of the Eigen API is contained in this class. Other important classes for
  * the Eigen API are Matrix, Cwise, and PartialRedux.
  *
-  * Note that some methods are defined in the \ref Array module.
+  * Note that some methods are defined in the \ref Array_Module array module.
  *
  * \param Derived is the derived type, e.g. a matrix type, or an expression, etc.
  *
@@ -52,13 +53,22 @@
  *
  */
 template<typename Derived> class MatrixBase
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+  : public ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar,
+                typename NumTraits<typename ei_traits<Derived>::Scalar>::Real>
+#endif // not EIGEN_PARSED_BY_DOXYGEN
 {
  public:

+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    using ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar,
+                typename NumTraits<typename ei_traits<Derived>::Scalar>::Real>::operator*;
+
    class InnerIterator;

    typedef typename ei_traits<Derived>::Scalar Scalar;
    typedef typename ei_packet_traits<Scalar>::type PacketScalar;
+#endif // not EIGEN_PARSED_BY_DOXYGEN

    enum {

@@ -139,6 +149,7 @@ template<typename Derived> class MatrixBase
      ei_assert(ei_are_flags_consistent<Flags>::ret);
    }

+#ifndef EIGEN_PARSED_BY_DOXYGEN
    /** This is the "real scalar" type; if the \a Scalar type is already real numbers
      * (e.g. int, float or double) then \a RealScalar is just the same as \a Scalar. If
      * \a Scalar is \a std::complex<T> then RealScalar is \a T.
@@ -150,17 +161,21 @@ template<typename Derived> class MatrixBase
    /** type of the equivalent square matrix */
    typedef Matrix<Scalar,EIGEN_ENUM_MAX(RowsAtCompileTime,ColsAtCompileTime),
                          EIGEN_ENUM_MAX(RowsAtCompileTime,ColsAtCompileTime)> SquareMatrixType;
+#endif // not EIGEN_PARSED_BY_DOXYGEN

    /** \returns the number of rows. \sa cols(), RowsAtCompileTime */
    inline int rows() const { return derived().rows(); }
    /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
    inline int cols() const { return derived().cols(); }
-    /** \returns the number of coefficients, which is \a rows()*cols().
+    /** \returns the number of coefficients, which is rows()*cols().
      * \sa rows(), cols(), SizeAtCompileTime. */
    inline int size() const { return rows() * cols(); }
+    /** \returns the size of the main diagonal, which is min(rows(),cols()).
+      * \sa rows(), cols(), SizeAtCompileTime. */
+    inline int diagonalSize() const { return std::min(rows(),cols()); }
    /** \returns the number of nonzero coefficients which is in practice the number
      * of stored coefficients. */
-    inline int nonZeros() const { return derived.nonZeros(); }
+    inline int nonZeros() const { return derived().nonZeros(); }
    /** \returns true if either the number of rows or the number of columns is equal to 1.
      * In other words, this function returns
      * \code rows()==1 || cols()==1 \endcode
@@ -173,9 +188,25 @@ template<typename Derived> class MatrixBase
      * i.e., the number of rows for a columns major matrix, and the number of cols otherwise */
    int innerSize() const { return (int(Flags)&RowMajorBit) ? this->cols() : this->rows(); }

-    /** \internal the type to which the expression gets evaluated (needed by MSVC) */
-    typedef typename ei_eval<Derived>::type EvalType;
-    /** \internal Represents a constant matrix */
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** \internal the plain matrix type corresponding to this expression. Note that is not necessarily
+      * exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const
+      * reference to a matrix, not a matrix! It guaranteed however, that the return type of eval() is either
+      * PlainMatrixType or const PlainMatrixType&.
+      */
+    typedef typename ei_plain_matrix_type<Derived>::type PlainMatrixType;
+    /** \internal the column-major plain matrix type corresponding to this expression. Note that is not necessarily
+      * exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const
+      * reference to a matrix, not a matrix!
+      * The only difference from PlainMatrixType is that PlainMatrixType_ColMajor is guaranteed to be column-major.
+      */
+    typedef typename ei_plain_matrix_type<Derived>::type PlainMatrixType_ColMajor;
+
+    /** \internal the return type of coeff()
+      */
+    typedef typename ei_meta_if<bool(int(Flags)&DirectAccessBit), const Scalar&, Scalar>::ret CoeffReturnType;
+
+    /** \internal Represents a matrix with all coefficients equal to one another*/
    typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType;
    /** \internal Represents a scalar multiple of a matrix */
    typedef CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, Derived> ScalarMultipleReturnType;
@@ -188,8 +219,10 @@ template<typename Derived> class MatrixBase
                     >::ret ConjugateReturnType;
    /** \internal the return type of MatrixBase::real() */
    typedef CwiseUnaryOp<ei_scalar_real_op<Scalar>, Derived> RealReturnType;
+    /** \internal the return type of MatrixBase::imag() */
+    typedef CwiseUnaryOp<ei_scalar_imag_op<Scalar>, Derived> ImagReturnType;
    /** \internal the return type of MatrixBase::adjoint() */
-    typedef Transpose<NestByValue<typename ei_cleantype<ConjugateReturnType>::type> >
+    typedef Eigen::Transpose<NestByValue<typename ei_cleantype<ConjugateReturnType>::type> >
            AdjointReturnType;
    /** \internal the return type of MatrixBase::eigenvalues() */
    typedef Matrix<typename NumTraits<typename ei_traits<Derived>::Scalar>::Real, ei_traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
@@ -203,16 +236,13 @@ template<typename Derived> class MatrixBase
    typedef Block<CwiseNullaryOp<ei_scalar_identity_op<Scalar>, SquareMatrixType>,
                  ei_traits<Derived>::RowsAtCompileTime,
                  ei_traits<Derived>::ColsAtCompileTime> BasisReturnType;
+#endif // not EIGEN_PARSED_BY_DOXYGEN


    /** Copies \a other into *this. \returns a reference to *this. */
    template<typename OtherDerived>
    Derived& operator=(const MatrixBase<OtherDerived>& other);

-    /** Copies \a other into *this without evaluating other. \returns a reference to *this. */
-    template<typename OtherDerived>
-    Derived& lazyAssign(const MatrixBase<OtherDerived>& other);
-
    /** Special case of the template operator=, in order to prevent the compiler
      * from generating a default operator= (issue hit with g++ 4.1)
      */
@@ -221,6 +251,11 @@ template<typename Derived> class MatrixBase
      return this->operator=<Derived>(other);
    }

+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** Copies \a other into *this without evaluating other. \returns a reference to *this. */
+    template<typename OtherDerived>
+    Derived& lazyAssign(const MatrixBase<OtherDerived>& other);
+
    /** Overloaded for cache friendly product evaluation */
    template<typename Lhs, typename Rhs>
    Derived& lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product);
@@ -229,30 +264,28 @@ template<typename Derived> class MatrixBase
    template<typename OtherDerived>
    Derived& lazyAssign(const Flagged<OtherDerived, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other)
    { return lazyAssign(other._expression()); }
-
-    /** Overloaded for sparse product evaluation */
-    template<typename Derived1, typename Derived2>
-    Derived& lazyAssign(const Product<Derived1,Derived2,SparseProduct>& product);
+#endif // not EIGEN_PARSED_BY_DOXYGEN

    CommaInitializer<Derived> operator<< (const Scalar& s);

    template<typename OtherDerived>
    CommaInitializer<Derived> operator<< (const MatrixBase<OtherDerived>& other);

-    const Scalar coeff(int row, int col) const;
-    const Scalar operator()(int row, int col) const;
+    const CoeffReturnType coeff(int row, int col) const;
+    const CoeffReturnType operator()(int row, int col) const;

    Scalar& coeffRef(int row, int col);
    Scalar& operator()(int row, int col);

-    const Scalar coeff(int index) const;
-    const Scalar operator[](int index) const;
-    const Scalar operator()(int index) const;
+    const CoeffReturnType coeff(int index) const;
+    const CoeffReturnType operator[](int index) const;
+    const CoeffReturnType operator()(int index) const;

    Scalar& coeffRef(int index);
    Scalar& operator[](int index);
    Scalar& operator()(int index);

+#ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename OtherDerived>
    void copyCoeff(int row, int col, const MatrixBase<OtherDerived>& other);
    template<typename OtherDerived>
@@ -261,6 +294,7 @@ template<typename Derived> class MatrixBase
    void copyPacket(int row, int col, const MatrixBase<OtherDerived>& other);
    template<typename OtherDerived, int StoreMode, int LoadMode>
    void copyPacket(int index, const MatrixBase<OtherDerived>& other);
+#endif // not EIGEN_PARSED_BY_DOXYGEN

    template<int LoadMode>
    PacketScalar packet(int row, int col) const;
@@ -272,10 +306,10 @@ template<typename Derived> class MatrixBase
    template<int StoreMode>
    void writePacket(int index, const PacketScalar& x);

-    const Scalar x() const;
-    const Scalar y() const;
-    const Scalar z() const;
-    const Scalar w() const;
+    const CoeffReturnType x() const;
+    const CoeffReturnType y() const;
+    const CoeffReturnType z() const;
+    const CoeffReturnType w() const;
    Scalar& x();
    Scalar& y();
    Scalar& z();
@@ -304,6 +338,9 @@ template<typename Derived> class MatrixBase
    Derived& operator/=(const Scalar& other);

    const ScalarMultipleReturnType operator*(const Scalar& scalar) const;
+    #ifdef EIGEN_PARSED_BY_DOXYGEN
+    const ScalarMultipleReturnType operator*(const RealScalar& scalar) const;
+    #endif
    const CwiseUnaryOp<ei_scalar_quotient1_op<typename ei_traits<Derived>::Scalar>, Derived>
    operator/(const Scalar& scalar) const;

@@ -320,23 +357,26 @@ template<typename Derived> class MatrixBase
    Derived& operator*=(const MatrixBase<OtherDerived>& other);

    template<typename OtherDerived>
-    typename OtherDerived::Eval solveTriangular(const MatrixBase<OtherDerived>& other) const;
+    typename ei_plain_matrix_type_column_major<OtherDerived>::type
+		solveTriangular(const MatrixBase<OtherDerived>& other) const;

    template<typename OtherDerived>
-    void solveTriangularInPlace(MatrixBase<OtherDerived>& other) const;
+    void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const;


    template<typename OtherDerived>
    Scalar dot(const MatrixBase<OtherDerived>& other) const;
-    RealScalar norm2() const;
+    RealScalar squaredNorm() const;
    RealScalar norm()  const;
-    const EvalType normalized() const;
+    RealScalar stableNorm()  const;
+    const PlainMatrixType normalized() const;
    void normalize();

-    Transpose<Derived> transpose();
-    const Transpose<Derived> transpose() const;
+    Eigen::Transpose<Derived> transpose();
+    const Eigen::Transpose<Derived> transpose() const;
+    void transposeInPlace();
    const AdjointReturnType adjoint() const;
-
+    void adjointInPlace();

    RowXpr row(int i);
    const RowXpr row(int i) const;
@@ -351,8 +391,8 @@ template<typename Derived> class MatrixBase
    const typename BlockReturnType<Derived>::Type
    block(int startRow, int startCol, int blockRows, int blockCols) const;

-    typename BlockReturnType<Derived>::SubVectorType block(int start, int size);
-    const typename BlockReturnType<Derived>::SubVectorType block(int start, int size) const;
+    typename BlockReturnType<Derived>::SubVectorType segment(int start, int size);
+    const typename BlockReturnType<Derived>::SubVectorType segment(int start, int size) const;

    typename BlockReturnType<Derived,Dynamic>::SubVectorType start(int size);
    const typename BlockReturnType<Derived,Dynamic>::SubVectorType start(int size) const;
@@ -379,12 +419,18 @@ template<typename Derived> class MatrixBase
    template<int Size> typename BlockReturnType<Derived,Size>::SubVectorType end();
    template<int Size> const typename BlockReturnType<Derived,Size>::SubVectorType end() const;

-    template<int Size> typename BlockReturnType<Derived,Size>::SubVectorType block(int start);
-    template<int Size> const typename BlockReturnType<Derived,Size>::SubVectorType block(int start) const;
+    template<int Size> typename BlockReturnType<Derived,Size>::SubVectorType segment(int start);
+    template<int Size> const typename BlockReturnType<Derived,Size>::SubVectorType segment(int start) const;

-    DiagonalCoeffs<Derived> diagonal();
-    const DiagonalCoeffs<Derived> diagonal() const;
+    Diagonal<Derived,0> diagonal();
+    const Diagonal<Derived,0> diagonal() const;

+    template<int Index> Diagonal<Derived,Index> diagonal();
+    template<int Index> const Diagonal<Derived,Index> diagonal() const;
+    
+    Diagonal<Derived, Dynamic> diagonal(int index);
+    const Diagonal<Derived, Dynamic> diagonal(int index) const;
+    
    template<unsigned int Mode> Part<Derived, Mode> part();
    template<unsigned int Mode> const Part<Derived, Mode> part() const;

@@ -421,8 +467,9 @@ template<typename Derived> class MatrixBase
    static const BasisReturnType UnitZ();
    static const BasisReturnType UnitW();

-    const DiagonalMatrix<Derived> asDiagonal() const;
+    const DiagonalMatrixWrapper<Derived> asDiagonal() const;

+    void fill(const Scalar& value);
    Derived& setConstant(const Scalar& value);
    Derived& setZero();
    Derived& setOnes();
@@ -440,13 +487,14 @@ template<typename Derived> class MatrixBase
                           RealScalar prec = precision<Scalar>()) const;

    bool isApproxToConstant(const Scalar& value, RealScalar prec = precision<Scalar>()) const;
+    bool isConstant(const Scalar& value, RealScalar prec = precision<Scalar>()) const;
    bool isZero(RealScalar prec = precision<Scalar>()) const;
    bool isOnes(RealScalar prec = precision<Scalar>()) const;
    bool isIdentity(RealScalar prec = precision<Scalar>()) const;
    bool isDiagonal(RealScalar prec = precision<Scalar>()) const;

-    bool isUpper(RealScalar prec = precision<Scalar>()) const;
-    bool isLower(RealScalar prec = precision<Scalar>()) const;
+    bool isUpperTriangular(RealScalar prec = precision<Scalar>()) const;
+    bool isLowerTriangular(RealScalar prec = precision<Scalar>()) const;

    template<typename OtherDerived>
    bool isOrthogonal(const MatrixBase<OtherDerived>& other,
@@ -463,15 +511,19 @@ template<typename Derived> class MatrixBase


    template<typename NewType>
-    const CwiseUnaryOp<ei_scalar_cast_op<typename ei_traits<Derived>::Scalar, NewType>, Derived> cast() const;
-
+    typename ei_cast_return_type<
+        Derived,
+        const CwiseUnaryOp<ei_scalar_cast_op<typename ei_traits<Derived>::Scalar, NewType>, Derived>
+      >::type
+    cast() const;
+           
    /** \returns the matrix or vector obtained by evaluating this expression.
      *
+      * Notice that in the case of a plain matrix or vector (not an expression) this function just returns
+      * a const reference, in order to avoid a useless copy.
      */
-    EIGEN_ALWAYS_INLINE const typename ei_eval<Derived>::type eval() const
-    {
-      return typename ei_eval<Derived>::type(derived());
-    }
+    EIGEN_STRONG_INLINE const typename ei_eval<Derived>::type eval() const
+    { return typename ei_eval<Derived>::type(derived()); }

    template<typename OtherDerived>
    void swap(const MatrixBase<OtherDerived>& other);
@@ -492,6 +544,7 @@ template<typename Derived> class MatrixBase

    ConjugateReturnType conjugate() const;
    const RealReturnType real() const;
+    const ImagReturnType imag() const;

    template<typename CustomUnaryOp>
    const CwiseUnaryOp<CustomUnaryOp, Derived> unaryExpr(const CustomUnaryOp& func = CustomUnaryOp()) const;
@@ -504,6 +557,8 @@ template<typename Derived> class MatrixBase
    Scalar sum() const;
    Scalar trace() const;

+    Scalar prod() const;
+
    typename ei_traits<Derived>::Scalar minCoeff() const;
    typename ei_traits<Derived>::Scalar maxCoeff() const;

@@ -517,11 +572,12 @@ template<typename Derived> class MatrixBase
    template<typename Visitor>
    void visit(Visitor& func) const;

-
+#ifndef EIGEN_PARSED_BY_DOXYGEN
    inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
    inline Derived& derived() { return *static_cast<Derived*>(this); }
    inline Derived& const_cast_derived() const
    { return *static_cast<Derived*>(const_cast<MatrixBase*>(this)); }
+#endif // not EIGEN_PARSED_BY_DOXYGEN

    const Cwise<Derived> cwise() const;
    Cwise<Derived> cwise();
@@ -532,6 +588,7 @@ template<typename Derived> class MatrixBase

    bool all(void) const;
    bool any(void) const;
+    int count() const;

    const PartialRedux<Derived,Horizontal> rowwise() const;
    const PartialRedux<Derived,Vertical> colwise() const;
@@ -544,44 +601,83 @@ template<typename Derived> class MatrixBase
    const Select<Derived,ThenDerived,ElseDerived>
    select(const MatrixBase<ThenDerived>& thenMatrix,
           const MatrixBase<ElseDerived>& elseMatrix) const;
-   
+
    template<typename ThenDerived>
    inline const Select<Derived,ThenDerived, NestByValue<typename ThenDerived::ConstantReturnType> >
    select(const MatrixBase<ThenDerived>& thenMatrix, typename ThenDerived::Scalar elseScalar) const;
-    
+
    template<typename ElseDerived>
    inline const Select<Derived, NestByValue<typename ElseDerived::ConstantReturnType>, ElseDerived >
    select(typename ElseDerived::Scalar thenScalar, const MatrixBase<ElseDerived>& elseMatrix) const;

+    template<int p> RealScalar lpNorm() const;
+
+    template<int RowFactor, int ColFactor>
+    const Replicate<Derived,RowFactor,ColFactor> replicate() const;
+    const Replicate<Derived,Dynamic,Dynamic> replicate(int rowFacor,int colFactor) const;
+
+    Eigen::Reverse<Derived, BothDirections> reverse();
+    const Eigen::Reverse<Derived, BothDirections> reverse() const;
+    void reverseInPlace();
+
 /////////// LU module ///////////

-    const LU<EvalType> lu() const;
-    const EvalType inverse() const;
-    void computeInverse(EvalType *result) const;
+    const LU<PlainMatrixType> lu() const;
+    const PartialLU<PlainMatrixType> partialLu() const;    
+    const PlainMatrixType inverse() const;
+    void computeInverse(PlainMatrixType *result) const;
    Scalar determinant() const;

 /////////// Cholesky module ///////////

-    const Cholesky<EvalType> cholesky() const;
-    const CholeskyWithoutSquareRoot<EvalType> choleskyNoSqrt() const;
+    const LLT<PlainMatrixType>  llt() const;
+    const LDLT<PlainMatrixType> ldlt() const;

 /////////// QR module ///////////

-    const QR<EvalType> qr() const;
+    const QR<PlainMatrixType> qr() const;

    EigenvaluesReturnType eigenvalues() const;
    RealScalar operatorNorm() const;

 /////////// SVD module ///////////

-    SVD<EvalType> svd() const;
+    SVD<PlainMatrixType> svd() const;

 /////////// Geometry module ///////////

    template<typename OtherDerived>
-    EvalType cross(const MatrixBase<OtherDerived>& other) const;
-    EvalType unitOrthogonal(void) const;
-    
+    PlainMatrixType cross(const MatrixBase<OtherDerived>& other) const;
+    template<typename OtherDerived>
+    PlainMatrixType cross3(const MatrixBase<OtherDerived>& other) const;
+    PlainMatrixType unitOrthogonal(void) const;
+    Matrix<Scalar,3,1> eulerAngles(int a0, int a1, int a2) const;
+    const ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
+    enum {
+      SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
+    };
+    typedef Block<Derived,
+                  ei_traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
+                  ei_traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> StartMinusOne;
+    typedef CwiseUnaryOp<ei_scalar_quotient1_op<typename ei_traits<Derived>::Scalar>,
+                NestByValue<StartMinusOne> > HNormalizedReturnType;
+
+    const HNormalizedReturnType hnormalized() const;
+    typedef Homogeneous<Derived,MatrixBase<Derived>::ColsAtCompileTime==1?Vertical:Horizontal> HomogeneousReturnType;
+    const HomogeneousReturnType homogeneous() const;
+
+/////////// Sparse module ///////////
+
+    // dense = spasre * dense
+    template<typename Derived1, typename Derived2>
+    Derived& lazyAssign(const SparseProduct<Derived1,Derived2,SparseTimeDenseProduct>& product);
+    // dense = dense * spasre
+    template<typename Derived1, typename Derived2>
+    Derived& lazyAssign(const SparseProduct<Derived1,Derived2,DenseTimeSparseProduct>& product);
+
+    template<typename OtherDerived,typename OtherEvalType>
+    Derived& operator=(const ReturnByValue<OtherDerived,OtherEvalType>& func);
+
    #ifdef EIGEN_MATRIXBASE_PLUGIN
    #include EIGEN_MATRIXBASE_PLUGIN
    #endif
--- a/Eigen/src/Core/MatrixStorage.h
+++ b/Eigen/src/Core/MatrixStorage.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -26,6 +26,35 @@
 #ifndef EIGEN_MATRIXSTORAGE_H
 #define EIGEN_MATRIXSTORAGE_H

+struct ei_constructor_without_unaligned_array_assert {};
+
+/** \internal
+  * Static array automatically aligned if the total byte size is a multiple of 16 and the matrix options require auto alignment
+  */
+template <typename T, int Size, int MatrixOptions,
+          bool Align = (!(MatrixOptions&DontAlign)) && (((Size*sizeof(T))&0xf)==0)
+> struct ei_matrix_array
+{
+  EIGEN_ALIGN_128 T array[Size];
+
+  ei_matrix_array()
+  {
+    #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+    ei_assert((reinterpret_cast<size_t>(array) & 0xf) == 0
+              && "this assertion is explained here: http://eigen.tuxfamily.org/dox/UnalignedArrayAssert.html  **** READ THIS WEB PAGE !!! ****");
+    #endif
+  }
+
+  ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
+};
+
+template <typename T, int Size, int MatrixOptions> struct ei_matrix_array<T,Size,MatrixOptions,false>
+{
+  T array[Size];
+  ei_matrix_array() {}
+  ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
+};
+
 /** \internal
  *
  * \class ei_matrix_storage
@@ -37,14 +66,16 @@
  *
  * \sa Matrix
  */
-template<typename T, int Size, int _Rows, int _Cols> class ei_matrix_storage;
+template<typename T, int Size, int _Rows, int _Cols, int _Options> class ei_matrix_storage;

 // purely fixed-size matrix
-template<typename T, int Size, int _Rows, int _Cols> class ei_matrix_storage
+template<typename T, int Size, int _Rows, int _Cols, int _Options> class ei_matrix_storage
 {
-    ei_aligned_array<T,Size,((Size*sizeof(T))%16)==0> m_data;
+    ei_matrix_array<T,Size,_Options> m_data;
  public:
-    inline ei_matrix_storage() {}
+    inline explicit ei_matrix_storage() {}
+    inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
+      : m_data(ei_constructor_without_unaligned_array_assert()) {}
    inline ei_matrix_storage(int,int,int) {}
    inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); }
    inline static int rows(void) {return _Rows;}
@@ -54,13 +85,31 @@ template<typename T, int Size, int _Rows, int _Cols> class ei_matrix_storage
    inline T *data() { return m_data.array; }
 };

-// dynamic-size matrix with fixed-size storage
-template<typename T, int Size> class ei_matrix_storage<T, Size, Dynamic, Dynamic>
+// null matrix
+template<typename T, int _Rows, int _Cols, int _Options> class ei_matrix_storage<T, 0, _Rows, _Cols, _Options>
 {
-    ei_aligned_array<T,Size,((Size*sizeof(T))%16)==0> m_data;
+  public:
+    inline explicit ei_matrix_storage() {}
+    inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert) {}
+    inline ei_matrix_storage(int,int,int) {}
+    inline void swap(ei_matrix_storage& other) {}
+    inline static int rows(void) {return _Rows;}
+    inline static int cols(void) {return _Cols;}
+    inline void resize(int,int,int) {}
+    inline const T *data() const { return 0; }
+    inline T *data() { return 0; }
+};
+
+// dynamic-size matrix with fixed-size storage
+template<typename T, int Size, int _Options> class ei_matrix_storage<T, Size, Dynamic, Dynamic, _Options>
+{
+    ei_matrix_array<T,Size,_Options> m_data;
    int m_rows;
    int m_cols;
  public:
+    inline explicit ei_matrix_storage() : m_rows(0), m_cols(0) {}
+    inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
+      : m_data(ei_constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
    inline ei_matrix_storage(int, int rows, int cols) : m_rows(rows), m_cols(cols) {}
    inline ~ei_matrix_storage() {}
    inline void swap(ei_matrix_storage& other)
@@ -77,11 +126,14 @@ template<typename T, int Size> class ei_matrix_storage<T, Size, Dynamic, Dynamic
 };

 // dynamic-size matrix with fixed-size storage and fixed width
-template<typename T, int Size, int _Cols> class ei_matrix_storage<T, Size, Dynamic, _Cols>
+template<typename T, int Size, int _Cols, int _Options> class ei_matrix_storage<T, Size, Dynamic, _Cols, _Options>
 {
-    ei_aligned_array<T,Size,((Size*sizeof(T))%16)==0> m_data;
+    ei_matrix_array<T,Size,_Options> m_data;
    int m_rows;
  public:
+    inline explicit ei_matrix_storage() : m_rows(0) {}
+    inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
+      : m_data(ei_constructor_without_unaligned_array_assert()), m_rows(0) {}
    inline ei_matrix_storage(int, int rows, int) : m_rows(rows) {}
    inline ~ei_matrix_storage() {}
    inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
@@ -96,17 +148,20 @@ template<typename T, int Size, int _Cols> class ei_matrix_storage<T, Size, Dynam
 };

 // dynamic-size matrix with fixed-size storage and fixed height
-template<typename T, int Size, int _Rows> class ei_matrix_storage<T, Size, _Rows, Dynamic>
+template<typename T, int Size, int _Rows, int _Options> class ei_matrix_storage<T, Size, _Rows, Dynamic, _Options>
 {
-    ei_aligned_array<T,Size,((Size*sizeof(T))%16)==0> m_data;
+    ei_matrix_array<T,Size,_Options> m_data;
    int m_cols;
  public:
+    inline explicit ei_matrix_storage() : m_cols(0) {}
+    inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
+      : m_data(ei_constructor_without_unaligned_array_assert()), m_cols(0) {}
    inline ei_matrix_storage(int, int, int cols) : m_cols(cols) {}
    inline ~ei_matrix_storage() {}
    inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
    inline int rows(void) const {return _Rows;}
    inline int cols(void) const {return m_cols;}
-    inline void resize(int size, int, int cols)
+    inline void resize(int, int, int cols)
    {
      m_cols = cols;
    }
@@ -115,15 +170,18 @@ template<typename T, int Size, int _Rows> class ei_matrix_storage<T, Size, _Rows
 };

 // purely dynamic matrix.
-template<typename T> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic>
+template<typename T, int _Options> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic, _Options>
 {
    T *m_data;
    int m_rows;
    int m_cols;
  public:
+    inline explicit ei_matrix_storage() : m_data(0), m_rows(0), m_cols(0) {}
+    inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert)
+       : m_data(0), m_rows(0), m_cols(0) {}
    inline ei_matrix_storage(int size, int rows, int cols)
-      : m_data(ei_aligned_malloc<T>(size)), m_rows(rows), m_cols(cols) {}
-    inline ~ei_matrix_storage() { ei_aligned_free(m_data); }
+      : m_data(ei_aligned_new<T>(size)), m_rows(rows), m_cols(cols) {}
+    inline ~ei_matrix_storage() { ei_aligned_delete(m_data, m_rows*m_cols); }
    inline void swap(ei_matrix_storage& other)
    { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
    inline int rows(void) const {return m_rows;}
@@ -132,8 +190,11 @@ template<typename T> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic>
    {
      if(size != m_rows*m_cols)
      {
-        ei_aligned_free(m_data);
-        m_data = ei_aligned_malloc<T>(size);
+        ei_aligned_delete(m_data, m_rows*m_cols);
+        if (size)
+          m_data = ei_aligned_new<T>(size);
+        else
+          m_data = 0;
      }
      m_rows = rows;
      m_cols = cols;
@@ -143,13 +204,15 @@ template<typename T> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic>
 };

 // matrix with dynamic width and fixed height (so that matrix has dynamic size).
-template<typename T, int _Rows> class ei_matrix_storage<T, Dynamic, _Rows, Dynamic>
+template<typename T, int _Rows, int _Options> class ei_matrix_storage<T, Dynamic, _Rows, Dynamic, _Options>
 {
    T *m_data;
    int m_cols;
  public:
-    inline ei_matrix_storage(int size, int, int cols) : m_data(ei_aligned_malloc<T>(size)), m_cols(cols) {}
-    inline ~ei_matrix_storage() { ei_aligned_free(m_data); }
+    inline explicit ei_matrix_storage() : m_data(0), m_cols(0) {}
+    inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
+    inline ei_matrix_storage(int size, int, int cols) : m_data(ei_aligned_new<T>(size)), m_cols(cols) {}
+    inline ~ei_matrix_storage() { ei_aligned_delete(m_data, _Rows*m_cols); }
    inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
    inline static int rows(void) {return _Rows;}
    inline int cols(void) const {return m_cols;}
@@ -157,8 +220,11 @@ template<typename T, int _Rows> class ei_matrix_storage<T, Dynamic, _Rows, Dynam
    {
      if(size != _Rows*m_cols)
      {
-        ei_aligned_free(m_data);
-        m_data = ei_aligned_malloc<T>(size);
+        ei_aligned_delete(m_data, _Rows*m_cols);
+        if (size)
+          m_data = ei_aligned_new<T>(size);
+        else
+          m_data = 0;
      }
      m_cols = cols;
    }
@@ -167,13 +233,15 @@ template<typename T, int _Rows> class ei_matrix_storage<T, Dynamic, _Rows, Dynam
 };

 // matrix with dynamic height and fixed width (so that matrix has dynamic size).
-template<typename T, int _Cols> class ei_matrix_storage<T, Dynamic, Dynamic, _Cols>
+template<typename T, int _Cols, int _Options> class ei_matrix_storage<T, Dynamic, Dynamic, _Cols, _Options>
 {
    T *m_data;
    int m_rows;
  public:
-    inline ei_matrix_storage(int size, int rows, int) : m_data(ei_aligned_malloc<T>(size)), m_rows(rows) {}
-    inline ~ei_matrix_storage() { ei_aligned_free(m_data); }
+    inline explicit ei_matrix_storage() : m_data(0), m_rows(0) {}
+    inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
+    inline ei_matrix_storage(int size, int rows, int) : m_data(ei_aligned_new<T>(size)), m_rows(rows) {}
+    inline ~ei_matrix_storage() { ei_aligned_delete(m_data, _Cols*m_rows); }
    inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
    inline int rows(void) const {return m_rows;}
    inline static int cols(void) {return _Cols;}
@@ -181,8 +249,11 @@ template<typename T, int _Cols> class ei_matrix_storage<T, Dynamic, Dynamic, _Co
    {
      if(size != m_rows*_Cols)
      {
-        ei_aligned_free(m_data);
-        m_data = ei_aligned_malloc<T>(size);
+        ei_aligned_delete(m_data, _Cols*m_rows);
+        if (size)
+          m_data = ei_aligned_new<T>(size);
+        else
+          m_data = 0;
      }
      m_rows = rows;
    }
--- a/Eigen/src/Core/Minor.h
+++ b/Eigen/src/Core/Minor.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -25,7 +25,8 @@
 #ifndef EIGEN_MINOR_H
 #define EIGEN_MINOR_H

-/** \class Minor
+/** \nonstableyet 
+  * \class Minor
  *
  * \brief Expression of a minor
  *
@@ -92,7 +93,8 @@ template<typename MatrixType> class Minor
    const int m_row, m_col;
 };

-/** \return an expression of the (\a row, \a col)-minor of *this,
+/** \nonstableyet 
+  * \return an expression of the (\a row, \a col)-minor of *this,
  * i.e. an expression constructed from *this by removing the specified
  * row and column.
  *
@@ -108,7 +110,8 @@ MatrixBase<Derived>::minor(int row, int col)
  return Minor<Derived>(derived(), row, col);
 }

-/** This is the const version of minor(). */
+/** \nonstableyet 
+  * This is the const version of minor(). */
 template<typename Derived>
 inline const Minor<Derived>
 MatrixBase<Derived>::minor(int row, int col) const
--- a/Eigen/src/Core/NestByValue.h
+++ b/Eigen/src/Core/NestByValue.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -38,18 +38,8 @@
  * \sa MatrixBase::nestByValue()
  */
 template<typename ExpressionType>
-struct ei_traits<NestByValue<ExpressionType> >
-{
-  typedef typename ExpressionType::Scalar Scalar;
-  enum {
-    RowsAtCompileTime = ExpressionType::RowsAtCompileTime,
-    ColsAtCompileTime = ExpressionType::ColsAtCompileTime,
-    MaxRowsAtCompileTime = ExpressionType::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = ExpressionType::MaxColsAtCompileTime,
-    Flags = ExpressionType::Flags,
-    CoeffReadCost = ExpressionType::CoeffReadCost
-  };
-};
+struct ei_traits<NestByValue<ExpressionType> > : public ei_traits<ExpressionType>
+{};

 template<typename ExpressionType> class NestByValue
  : public MatrixBase<NestByValue<ExpressionType> >
@@ -64,7 +54,7 @@ template<typename ExpressionType> class NestByValue
    inline int cols() const { return m_expression.cols(); }
    inline int stride() const { return m_expression.stride(); }

-    inline const Scalar coeff(int row, int col) const
+    inline const CoeffReturnType coeff(int row, int col) const
    {
      return m_expression.coeff(row, col);
    }
@@ -74,7 +64,7 @@ template<typename ExpressionType> class NestByValue
      return m_expression.const_cast_derived().coeffRef(row, col);
    }

-    inline const Scalar coeff(int index) const
+    inline const CoeffReturnType coeff(int index) const
    {
      return m_expression.coeff(index);
    }
@@ -107,6 +97,8 @@ template<typename ExpressionType> class NestByValue
    {
      m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
    }
+    
+    operator const ExpressionType&() const { return m_expression; }

  protected:
    const ExpressionType m_expression;
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
--- a/Eigen/src/Core/Part.h
+++ b/Eigen/src/Core/Part.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
 //
 // Eigen is free software; you can redistribute it and/or
@@ -26,13 +26,14 @@
 #ifndef EIGEN_PART_H
 #define EIGEN_PART_H

-/** \class Part
+/** \nonstableyet
+  * \class Part
  *
  * \brief Expression of a triangular matrix extracted from a given matrix
  *
  * \param MatrixType the type of the object in which we are taking the triangular part
-  * \param Mode the kind of triangular matrix expression to construct. Can be Upper, StrictlyUpper,
-  *             UnitUpper, Lower, StrictlyLower, UnitLower. This is in fact a bit field; it must have either
+  * \param Mode the kind of triangular matrix expression to construct. Can be UpperTriangular, StrictlyUpperTriangular,
+  *             UnitUpperTriangular, LowerTriangular, StrictlyLowerTriangular, UnitLowerTriangular. This is in fact a bit field; it must have either
  *             UpperTriangularBit or LowerTriangularBit, and additionnaly it may have either ZeroDiagBit or
  *             UnitDiagBit.
  *
@@ -43,16 +44,11 @@
  * \sa MatrixBase::part()
  */
 template<typename MatrixType, unsigned int Mode>
-struct ei_traits<Part<MatrixType, Mode> >
+struct ei_traits<Part<MatrixType, Mode> > : ei_traits<MatrixType>
 {
-  typedef typename MatrixType::Scalar Scalar;
  typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
  typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
  enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
    Flags = (_MatrixTypeNested::Flags & (HereditaryBits) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) | Mode,
    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
  };
@@ -88,8 +84,10 @@ template<typename MatrixType, unsigned int Mode> class Part

    inline Scalar coeff(int row, int col) const
    {
+      // SelfAdjointBit doesn't play any role here: just because a matrix is selfadjoint doesn't say anything about
+      // each individual coefficient, except for the not-very-useful-here fact that diagonal coefficients are real.
      if( ((Flags & LowerTriangularBit) && (col>row)) || ((Flags & UpperTriangularBit) && (row>col)) )
-        return (Flags & SelfAdjointBit) ? ei_conj(m_matrix.coeff(col, row)) : (Scalar)0;
+        return (Scalar)0;
      if(Flags & UnitDiagBit)
        return col==row ? (Scalar)1 : m_matrix.coeff(row, col);
      else if(Flags & ZeroDiagBit)
@@ -100,12 +98,12 @@ template<typename MatrixType, unsigned int Mode> class Part

    inline Scalar& coeffRef(int row, int col)
    {
-      EIGEN_STATIC_ASSERT(!(Flags & UnitDiagBit), writting_to_triangular_part_with_unit_diag_is_not_supported);
-      EIGEN_STATIC_ASSERT(!(Flags & SelfAdjointBit), default_writting_to_selfadjoint_not_supported);
-      ei_assert(   (Mode==Upper && col>=row)
-                || (Mode==Lower && col<=row)
-                || (Mode==StrictlyUpper && col>row)
-                || (Mode==StrictlyLower && col<row));
+      EIGEN_STATIC_ASSERT(!(Flags & UnitDiagBit), WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED)
+      EIGEN_STATIC_ASSERT(!(Flags & SelfAdjointBit), COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED)
+      ei_assert(   (Mode==UpperTriangular && col>=row)
+                || (Mode==LowerTriangular && col<=row)
+                || (Mode==StrictlyUpperTriangular && col>row)
+                || (Mode==StrictlyLowerTriangular && col<row));
      return m_matrix.const_cast_derived().coeffRef(row, col);
    }

@@ -119,15 +117,22 @@ template<typename MatrixType, unsigned int Mode> class Part
    const Block<Part, RowsAtCompileTime, 1> col(int i) { return Base::col(i); }
    const Block<Part, RowsAtCompileTime, 1> col(int i) const { return Base::col(i); }

+    template<typename OtherDerived>
+    void swap(const MatrixBase<OtherDerived>& other)
+    {
+      Part<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
+    }
+
  protected:

    const typename MatrixType::Nested m_matrix;
 };

-/** \returns an expression of a triangular matrix extracted from the current matrix
+/** \nonstableyet
+  * \returns an expression of a triangular matrix extracted from the current matrix
  *
-  * The parameter \a Mode can have the following values: \c Upper, \c StrictlyUpper, \c UnitUpper,
-  * \c Lower, \c StrictlyLower, \c UnitLower.
+  * The parameter \a Mode can have the following values: \c UpperTriangular, \c StrictlyUpperTriangular, \c UnitUpperTriangular,
+  * \c LowerTriangular, \c StrictlyLowerTriangular, \c UnitLowerTriangular.
  *
  * \addexample PartExample \label How to extract a triangular part of an arbitrary matrix
  *
@@ -149,7 +154,7 @@ inline Part<MatrixType, Mode>& Part<MatrixType, Mode>::operator=(const Other& ot
 {
  if(Other::Flags & EvalBeforeAssigningBit)
  {
-    typename ei_eval<Other>::type other_evaluated(other.rows(), other.cols());
+    typename MatrixBase<Other>::PlainMatrixType other_evaluated(other.rows(), other.cols());
    other_evaluated.template part<Mode>().lazyAssign(other);
    lazyAssign(other_evaluated);
  }
@@ -179,12 +184,12 @@ struct ei_part_assignment_impl
    }
    else
    {
-      ei_assert(Mode == Upper || Mode == Lower || Mode == StrictlyUpper || Mode == StrictlyLower);
-      if((Mode == Upper && row <= col)
-      || (Mode == Lower && row >= col)
-      || (Mode == StrictlyUpper && row < col)
-      || (Mode == StrictlyLower && row > col))
-        dst.coeffRef(row, col) = src.coeff(row, col);
+      ei_assert(Mode == UpperTriangular || Mode == LowerTriangular || Mode == StrictlyUpperTriangular || Mode == StrictlyLowerTriangular);
+      if((Mode == UpperTriangular && row <= col)
+      || (Mode == LowerTriangular && row >= col)
+      || (Mode == StrictlyUpperTriangular && row < col)
+      || (Mode == StrictlyLowerTriangular && row > col))
+        dst.copyCoeff(row, col, src);
    }
  }
 };
@@ -195,7 +200,7 @@ struct ei_part_assignment_impl<Derived1, Derived2, Mode, 1>
  inline static void run(Derived1 &dst, const Derived2 &src)
  {
    if(!(Mode & ZeroDiagBit))
-      dst.coeffRef(0, 0) = src.coeff(0, 0);
+      dst.copyCoeff(0, 0, src);
  }
 };

@@ -207,45 +212,45 @@ struct ei_part_assignment_impl<Derived1, Derived2, Mode, 0>
 };

 template<typename Derived1, typename Derived2>
-struct ei_part_assignment_impl<Derived1, Derived2, Upper, Dynamic>
+struct ei_part_assignment_impl<Derived1, Derived2, UpperTriangular, Dynamic>
 {
  inline static void run(Derived1 &dst, const Derived2 &src)
  {
-    for(int j = 0; j < dst.cols(); j++)
-      for(int i = 0; i <= j; i++)
-        dst.coeffRef(i, j) = src.coeff(i, j);
+    for(int j = 0; j < dst.cols(); ++j)
+      for(int i = 0; i <= j; ++i)
+        dst.copyCoeff(i, j, src);
  }
 };

 template<typename Derived1, typename Derived2>
-struct ei_part_assignment_impl<Derived1, Derived2, Lower, Dynamic>
+struct ei_part_assignment_impl<Derived1, Derived2, LowerTriangular, Dynamic>
 {
  inline static void run(Derived1 &dst, const Derived2 &src)
  {
-    for(int j = 0; j < dst.cols(); j++)
-      for(int i = j; i < dst.rows(); i++)
-        dst.coeffRef(i, j) = src.coeff(i, j);
+    for(int j = 0; j < dst.cols(); ++j)
+      for(int i = j; i < dst.rows(); ++i)
+        dst.copyCoeff(i, j, src);
  }
 };

 template<typename Derived1, typename Derived2>
-struct ei_part_assignment_impl<Derived1, Derived2, StrictlyUpper, Dynamic>
+struct ei_part_assignment_impl<Derived1, Derived2, StrictlyUpperTriangular, Dynamic>
 {
  inline static void run(Derived1 &dst, const Derived2 &src)
  {
-    for(int j = 0; j < dst.cols(); j++)
-      for(int i = 0; i < j; i++)
-        dst.coeffRef(i, j) = src.coeff(i, j);
+    for(int j = 0; j < dst.cols(); ++j)
+      for(int i = 0; i < j; ++i)
+        dst.copyCoeff(i, j, src);
  }
 };
 template<typename Derived1, typename Derived2>
-struct ei_part_assignment_impl<Derived1, Derived2, StrictlyLower, Dynamic>
+struct ei_part_assignment_impl<Derived1, Derived2, StrictlyLowerTriangular, Dynamic>
 {
  inline static void run(Derived1 &dst, const Derived2 &src)
  {
-    for(int j = 0; j < dst.cols(); j++)
-      for(int i = j+1; i < dst.rows(); i++)
-        dst.coeffRef(i, j) = src.coeff(i, j);
+    for(int j = 0; j < dst.cols(); ++j)
+      for(int i = j+1; i < dst.rows(); ++i)
+        dst.copyCoeff(i, j, src);
  }
 };
 template<typename Derived1, typename Derived2>
@@ -253,9 +258,9 @@ struct ei_part_assignment_impl<Derived1, Derived2, SelfAdjoint, Dynamic>
 {
  inline static void run(Derived1 &dst, const Derived2 &src)
  {
-    for(int j = 0; j < dst.cols(); j++)
+    for(int j = 0; j < dst.cols(); ++j)
    {
-      for(int i = 0; i < j; i++)
+      for(int i = 0; i < j; ++i)
        dst.coeffRef(j, i) = ei_conj(dst.coeffRef(i, j) = src.coeff(i, j));
      dst.coeffRef(j, j) = ei_real(src.coeff(j, j));
    }
@@ -275,10 +280,11 @@ void Part<MatrixType, Mode>::lazyAssign(const Other& other)
    >::run(m_matrix.const_cast_derived(), other.derived());
 }

-/** \returns a lvalue pseudo-expression allowing to perform special operations on \c *this.
+/** \nonstableyet
+  * \returns a lvalue pseudo-expression allowing to perform special operations on \c *this.
  *
-  * The \a Mode parameter can have the following values: \c Upper, \c StrictlyUpper, \c Lower,
-  * \c StrictlyLower, \c SelfAdjoint.
+  * The \a Mode parameter can have the following values: \c UpperTriangular, \c StrictlyUpperTriangular, \c LowerTriangular,
+  * \c StrictlyLowerTriangular, \c SelfAdjoint.
  *
  * \addexample PartExample \label How to write to a triangular part of a matrix
  *
@@ -297,44 +303,44 @@ inline Part<Derived, Mode> MatrixBase<Derived>::part()
 /** \returns true if *this is approximately equal to an upper triangular matrix,
  *          within the precision given by \a prec.
  *
-  * \sa isLower(), extract(), part(), marked()
+  * \sa isLowerTriangular(), extract(), part(), marked()
  */
 template<typename Derived>
-bool MatrixBase<Derived>::isUpper(RealScalar prec) const
+bool MatrixBase<Derived>::isUpperTriangular(RealScalar prec) const
 {
  if(cols() != rows()) return false;
-  RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
-  for(int j = 0; j < cols(); j++)
-    for(int i = 0; i <= j; i++)
+  RealScalar maxAbsOnUpperTriangularPart = static_cast<RealScalar>(-1);
+  for(int j = 0; j < cols(); ++j)
+    for(int i = 0; i <= j; ++i)
    {
      RealScalar absValue = ei_abs(coeff(i,j));
-      if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue;
+      if(absValue > maxAbsOnUpperTriangularPart) maxAbsOnUpperTriangularPart = absValue;
    }
-  for(int j = 0; j < cols()-1; j++)
-    for(int i = j+1; i < rows(); i++)
-      if(!ei_isMuchSmallerThan(coeff(i, j), maxAbsOnUpperPart, prec)) return false;
+  for(int j = 0; j < cols()-1; ++j)
+    for(int i = j+1; i < rows(); ++i)
+      if(!ei_isMuchSmallerThan(coeff(i, j), maxAbsOnUpperTriangularPart, prec)) return false;
  return true;
 }

 /** \returns true if *this is approximately equal to a lower triangular matrix,
  *          within the precision given by \a prec.
  *
-  * \sa isUpper(), extract(), part(), marked()
+  * \sa isUpperTriangular(), extract(), part(), marked()
  */
 template<typename Derived>
-bool MatrixBase<Derived>::isLower(RealScalar prec) const
+bool MatrixBase<Derived>::isLowerTriangular(RealScalar prec) const
 {
  if(cols() != rows()) return false;
-  RealScalar maxAbsOnLowerPart = static_cast<RealScalar>(-1);
-  for(int j = 0; j < cols(); j++)
-    for(int i = j; i < rows(); i++)
+  RealScalar maxAbsOnLowerTriangularPart = static_cast<RealScalar>(-1);
+  for(int j = 0; j < cols(); ++j)
+    for(int i = j; i < rows(); ++i)
    {
      RealScalar absValue = ei_abs(coeff(i,j));
-      if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue;
+      if(absValue > maxAbsOnLowerTriangularPart) maxAbsOnLowerTriangularPart = absValue;
    }
-  for(int j = 1; j < cols(); j++)
-    for(int i = 0; i < j; i++)
-      if(!ei_isMuchSmallerThan(coeff(i, j), maxAbsOnLowerPart, prec)) return false;
+  for(int j = 1; j < cols(); ++j)
+    for(int i = 0; i < j; ++i)
+      if(!ei_isMuchSmallerThan(coeff(i, j), maxAbsOnLowerTriangularPart, prec)) return false;
  return true;
 }

--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
 //
 // Eigen is free software; you can redistribute it and/or
@@ -30,14 +30,12 @@
 *** Forward declarations ***
 ***************************/

-template<int VectorizationMode, int Index, typename Lhs, typename Rhs>
+template<int VectorizationMode, int Index, typename Lhs, typename Rhs, typename RetScalar>
 struct ei_product_coeff_impl;

 template<int StorageOrder, int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
 struct ei_product_packet_impl;

-template<typename T> struct ei_product_eval_to_column_major;
-
 /** \class ProductReturnType
  *
  * \brief Helper class to get the correct and optimized returned type of operator*
@@ -64,13 +62,14 @@ struct ProductReturnType
 };

 // cache friendly specialization
+// note that there is a DiagonalProduct specialization in DiagonalProduct.h
 template<typename Lhs, typename Rhs>
 struct ProductReturnType<Lhs,Rhs,CacheFriendlyProduct>
 {
  typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;

  typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime,
-                             typename ei_product_eval_to_column_major<Rhs>::type
+                             typename ei_plain_matrix_type_column_major<Rhs>::type
                   >::type RhsNested;

  typedef Product<LhsNested, RhsNested, CacheFriendlyProduct> Type;
@@ -79,21 +78,20 @@ struct ProductReturnType<Lhs,Rhs,CacheFriendlyProduct>
 /*  Helper class to determine the type of the product, can be either:
 *    - NormalProduct
 *    - CacheFriendlyProduct
- *    - NormalProduct
+ *    - DiagonalProduct
 */
 template<typename Lhs, typename Rhs> struct ei_product_mode
 {
  enum{

-    value = ((Rhs::Flags&Diagonal)==Diagonal) || ((Lhs::Flags&Diagonal)==Diagonal)
+    value = ei_is_diagonal<Rhs>::ret || ei_is_diagonal<Lhs>::ret
          ? DiagonalProduct
-          : (Rhs::Flags & Lhs::Flags & SparseBit)
-          ? SparseProduct
-          : Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
-            && ( Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
-              || Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD )
+          : Lhs::MaxColsAtCompileTime == Dynamic
+            && ( Lhs::MaxRowsAtCompileTime == Dynamic
+              || Rhs::MaxColsAtCompileTime == Dynamic )
            && (!(Rhs::IsVectorAtCompileTime && (Lhs::Flags&RowMajorBit)  && (!(Lhs::Flags&DirectAccessBit))))
            && (!(Lhs::IsVectorAtCompileTime && (!(Rhs::Flags&RowMajorBit)) && (!(Rhs::Flags&DirectAccessBit))))
+            && (ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret)
          ? CacheFriendlyProduct
          : NormalProduct };
 };
@@ -118,9 +116,9 @@ template<typename LhsNested, typename RhsNested, int ProductMode>
 struct ei_traits<Product<LhsNested, RhsNested, ProductMode> >
 {
  // clean the nested types:
-  typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
-  typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
-  typedef typename _LhsNested::Scalar Scalar;
+  typedef typename ei_cleantype<LhsNested>::type _LhsNested;
+  typedef typename ei_cleantype<RhsNested>::type _RhsNested;
+  typedef typename ei_scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;

  enum {
    LhsCoeffReadCost = _LhsNested::CoeffReadCost,
@@ -151,7 +149,8 @@ struct ei_traits<Product<LhsNested, RhsNested, ProductMode> >
    Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits)
          | EvalBeforeAssigningBit
          | EvalBeforeNestingBit
-          | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0),
+          | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0)
+          | (LhsFlags & RhsFlags & AlignedBit),

    CoeffReadCost = InnerSize == Dynamic ? Dynamic
                  : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
@@ -188,7 +187,7 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product

    typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorization : NoVectorization,
                                  Unroll ? InnerSize-1 : Dynamic,
-                                  _LhsNested, _RhsNested> ScalarCoeffImpl;
+                                  _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;

  public:

@@ -196,7 +195,13 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product
    inline Product(const Lhs& lhs, const Rhs& rhs)
      : m_lhs(lhs), m_rhs(rhs)
    {
-      ei_assert(lhs.cols() == rhs.rows());
+      // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
+      // We still allow to mix T and complex<T>.
+      EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret),
+        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+      ei_assert(lhs.cols() == rhs.rows()
+        && "invalid matrix product"
+        && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
    }

    /** \internal
@@ -208,17 +213,17 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product
    /** \internal
      * \returns whether it is worth it to use the cache friendly product.
      */
-    inline bool _useCacheFriendlyProduct() const
+    EIGEN_STRONG_INLINE bool _useCacheFriendlyProduct() const
    {
      return  m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
              && (  rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
                 || cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD);
    }

-    inline int rows() const { return m_lhs.rows(); }
-    inline int cols() const { return m_rhs.cols(); }
+    EIGEN_STRONG_INLINE int rows() const { return m_lhs.rows(); }
+    EIGEN_STRONG_INLINE int cols() const { return m_rhs.cols(); }

-    const Scalar coeff(int row, int col) const
+    EIGEN_STRONG_INLINE const Scalar coeff(int row, int col) const
    {
      Scalar res;
      ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
@@ -228,7 +233,7 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product
    /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
     * which is why we don't set the LinearAccessBit.
     */
-    const Scalar coeff(int index) const
+    EIGEN_STRONG_INLINE const Scalar coeff(int index) const
    {
      Scalar res;
      const int row = RowsAtCompileTime == 1 ? 0 : index;
@@ -238,7 +243,7 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product
    }

    template<int LoadMode>
-    const PacketScalar packet(int row, int col) const
+    EIGEN_STRONG_INLINE const PacketScalar packet(int row, int col) const
    {
      PacketScalar res;
      ei_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
@@ -248,8 +253,8 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product
      return res;
    }

-    inline const _LhsNested& lhs() const { return m_lhs; }
-    inline const _RhsNested& rhs() const { return m_rhs; }
+    EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; }
+    EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; }

  protected:
    const LhsNested m_lhs;
@@ -267,6 +272,21 @@ template<typename OtherDerived>
 inline const typename ProductReturnType<Derived,OtherDerived>::Type
 MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
 {
+  enum {
+    ProductIsValid =  Derived::ColsAtCompileTime==Dynamic
+                   || OtherDerived::RowsAtCompileTime==Dynamic
+                   || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
+    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
+  };
+  // note to the lost user:
+  //    * for a dot product use: v1.dot(v2)
+  //    * for a coeff-wise product use: v1.cwise()*v2
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
+    INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
  return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
 }

@@ -290,41 +310,42 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
 *** Scalar path  - no vectorization ***
 **************************************/

-template<int Index, typename Lhs, typename Rhs>
-struct ei_product_coeff_impl<NoVectorization, Index, Lhs, Rhs>
+template<int Index, typename Lhs, typename Rhs, typename RetScalar>
+struct ei_product_coeff_impl<NoVectorization, Index, Lhs, Rhs, RetScalar>
 {
-  inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
  {
-    ei_product_coeff_impl<NoVectorization, Index-1, Lhs, Rhs>::run(row, col, lhs, rhs, res);
+    ei_product_coeff_impl<NoVectorization, Index-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
    res += lhs.coeff(row, Index) * rhs.coeff(Index, col);
  }
 };

-template<typename Lhs, typename Rhs>
-struct ei_product_coeff_impl<NoVectorization, 0, Lhs, Rhs>
+template<typename Lhs, typename Rhs, typename RetScalar>
+struct ei_product_coeff_impl<NoVectorization, 0, Lhs, Rhs, RetScalar>
 {
-  inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
  {
    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
  }
 };

-template<typename Lhs, typename Rhs>
-struct ei_product_coeff_impl<NoVectorization, Dynamic, Lhs, Rhs>
+template<typename Lhs, typename Rhs, typename RetScalar>
+struct ei_product_coeff_impl<NoVectorization, Dynamic, Lhs, Rhs, RetScalar>
 {
-  inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res)
+  EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
  {
+    ei_assert(lhs.cols()>0 && "you are using a non initialized matrix");
    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
-      for(int i = 1; i < lhs.cols(); i++)
+      for(int i = 1; i < lhs.cols(); ++i)
        res += lhs.coeff(row, i) * rhs.coeff(i, col);
  }
 };

 // prevent buggy user code from causing an infinite recursion
-template<typename Lhs, typename Rhs>
-struct ei_product_coeff_impl<NoVectorization, -1, Lhs, Rhs>
+template<typename Lhs, typename Rhs, typename RetScalar>
+struct ei_product_coeff_impl<NoVectorization, -1, Lhs, Rhs, RetScalar>
 {
-  inline static void run(int, int, const Lhs&, const Rhs&, typename Lhs::Scalar&) {}
+  EIGEN_STRONG_INLINE static void run(int, int, const Lhs&, const Rhs&, RetScalar&) {}
 };

 /*******************************************
@@ -335,7 +356,7 @@ template<int Index, typename Lhs, typename Rhs, typename PacketScalar>
 struct ei_product_coeff_vectorized_unroller
 {
  enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
-  inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
+  EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
  {
    ei_product_coeff_vectorized_unroller<Index-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres);
    pres = ei_padd(pres, ei_pmul( lhs.template packet<Aligned>(row, Index) , rhs.template packet<Aligned>(Index, col) ));
@@ -345,22 +366,22 @@ struct ei_product_coeff_vectorized_unroller
 template<typename Lhs, typename Rhs, typename PacketScalar>
 struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, PacketScalar>
 {
-  inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
+  EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
  {
    pres = ei_pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
  }
 };

-template<int Index, typename Lhs, typename Rhs>
-struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs>
+template<int Index, typename Lhs, typename Rhs, typename RetScalar>
+struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs, RetScalar>
 {
  typedef typename Lhs::PacketScalar PacketScalar;
  enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size };
-  inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
  {
    PacketScalar pres;
    ei_product_coeff_vectorized_unroller<Index+1-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres);
-    ei_product_coeff_impl<NoVectorization,Index,Lhs,Rhs>::run(row, col, lhs, rhs, res);
+    ei_product_coeff_impl<NoVectorization,Index,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
    res = ei_predux(pres);
  }
 };
@@ -368,7 +389,7 @@ struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs>
 template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
 struct ei_product_coeff_vectorized_dyn_selector
 {
-  inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
  {
    res = ei_dot_impl<
      Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>,
@@ -382,7 +403,7 @@ struct ei_product_coeff_vectorized_dyn_selector
 template<typename Lhs, typename Rhs, int RhsCols>
 struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
 {
-  inline static void run(int /*row*/, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE static void run(int /*row*/, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
  {
    res = ei_dot_impl<
      Lhs,
@@ -394,7 +415,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
 template<typename Lhs, typename Rhs, int LhsRows>
 struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
 {
-  inline static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
  {
    res = ei_dot_impl<
      Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>,
@@ -406,7 +427,7 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
 template<typename Lhs, typename Rhs>
 struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
 {
-  inline static void run(int /*row*/, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE static void run(int /*row*/, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
  {
    res = ei_dot_impl<
      Lhs,
@@ -415,10 +436,10 @@ struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
  }
 };

-template<typename Lhs, typename Rhs>
-struct ei_product_coeff_impl<InnerVectorization, Dynamic, Lhs, Rhs>
+template<typename Lhs, typename Rhs, typename RetScalar>
+struct ei_product_coeff_impl<InnerVectorization, Dynamic, Lhs, Rhs, RetScalar>
 {
-  inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
  {
    ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
  }
@@ -431,7 +452,7 @@ struct ei_product_coeff_impl<InnerVectorization, Dynamic, Lhs, Rhs>
 template<int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
 struct ei_product_packet_impl<RowMajor, Index, Lhs, Rhs, PacketScalar, LoadMode>
 {
-  inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
+  EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
  {
    ei_product_packet_impl<RowMajor, Index-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res);
    res =  ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet<LoadMode>(Index, col), res);
@@ -441,7 +462,7 @@ struct ei_product_packet_impl<RowMajor, Index, Lhs, Rhs, PacketScalar, LoadMode>
 template<int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
 struct ei_product_packet_impl<ColMajor, Index, Lhs, Rhs, PacketScalar, LoadMode>
 {
-  inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
+  EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
  {
    ei_product_packet_impl<ColMajor, Index-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res);
    res =  ei_pmadd(lhs.template packet<LoadMode>(row, Index), ei_pset1(rhs.coeff(Index, col)), res);
@@ -451,7 +472,7 @@ struct ei_product_packet_impl<ColMajor, Index, Lhs, Rhs, PacketScalar, LoadMode>
 template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
 struct ei_product_packet_impl<RowMajor, 0, Lhs, Rhs, PacketScalar, LoadMode>
 {
-  inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
+  EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
  {
    res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
  }
@@ -460,7 +481,7 @@ struct ei_product_packet_impl<RowMajor, 0, Lhs, Rhs, PacketScalar, LoadMode>
 template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
 struct ei_product_packet_impl<ColMajor, 0, Lhs, Rhs, PacketScalar, LoadMode>
 {
-  inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
+  EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res)
  {
    res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1(rhs.coeff(0, col)));
  }
@@ -469,10 +490,11 @@ struct ei_product_packet_impl<ColMajor, 0, Lhs, Rhs, PacketScalar, LoadMode>
 template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
 struct ei_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMode>
 {
-  inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
+  EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
  {
+    ei_assert(lhs.cols()>0 && "you are using a non initialized matrix");
    res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
-      for(int i = 1; i < lhs.cols(); i++)
+      for(int i = 1; i < lhs.cols(); ++i)
        res =  ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
  }
 };
@@ -480,10 +502,11 @@ struct ei_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMod
 template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
 struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMode>
 {
-  inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
+  EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res)
  {
+    ei_assert(lhs.cols()>0 && "you are using a non initialized matrix");
    res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1(rhs.coeff(0, col)));
-      for(int i = 1; i < lhs.cols(); i++)
+      for(int i = 1; i < lhs.cols(); ++i)
        res =  ei_pmadd(lhs.template packet<LoadMode>(row, i), ei_pset1(rhs.coeff(i, col)), res);
  }
 };
@@ -547,7 +570,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect
       _res = &res.coeffRef(0);
    else
    {
-      _res = ei_alloc_stack(Scalar,res.size());
+      _res = ei_aligned_stack_new(Scalar,res.size());
      Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res;
    }
    ei_cache_friendly_product_colmajor_times_vector(res.size(),
@@ -557,7 +580,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect
    if (!EvalToRes)
    {
      res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
-      ei_free_stack(_res, Scalar, res.size());
+      ei_aligned_stack_delete(Scalar, _res, res.size());
    }
  }
 };
@@ -593,7 +616,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
       _res = &res.coeffRef(0);
    else
    {
-      _res = ei_alloc_stack(Scalar, res.size());
+      _res = ei_aligned_stack_new(Scalar, res.size());
      Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size()) = res;
    }
    ei_cache_friendly_product_colmajor_times_vector(res.size(),
@@ -603,7 +626,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
    if (!EvalToRes)
    {
      res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
-      ei_free_stack(_res, Scalar, res.size());
+      ei_aligned_stack_delete(Scalar, _res, res.size());
    }
  }
 };
@@ -626,13 +649,13 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,RowMajor,HasDirect
       _rhs = &product.rhs().const_cast_derived().coeffRef(0);
    else
    {
-      _rhs = ei_alloc_stack(Scalar, product.rhs().size());
+      _rhs = ei_aligned_stack_new(Scalar, product.rhs().size());
      Map<Matrix<Scalar,Rhs::SizeAtCompileTime,1> >(_rhs, product.rhs().size()) = product.rhs();
    }
    ei_cache_friendly_product_rowmajor_times_vector(&product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(),
                                                    _rhs, product.rhs().size(), res);

-    if (!UseRhsDirectly) ei_free_stack(_rhs, Scalar, product.rhs().size());
+    if (!UseRhsDirectly) ei_aligned_stack_delete(Scalar, _rhs, product.rhs().size());
  }
 };

@@ -654,13 +677,13 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
       _lhs = &product.lhs().const_cast_derived().coeffRef(0);
    else
    {
-      _lhs = ei_alloc_stack(Scalar, product.lhs().size());
+      _lhs = ei_aligned_stack_new(Scalar, product.lhs().size());
      Map<Matrix<Scalar,Lhs::SizeAtCompileTime,1> >(_lhs, product.lhs().size()) = product.lhs();
    }
    ei_cache_friendly_product_rowmajor_times_vector(&product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(),
                                                    _lhs, product.lhs().size(), res);

-    if(!UseLhsDirectly) ei_free_stack(_lhs, Scalar, product.lhs().size());
+    if(!UseLhsDirectly) ei_aligned_stack_delete(Scalar, _lhs, product.lhs().size());
  }
 };

@@ -706,23 +729,12 @@ inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFrien
  return derived();
 }

-template<typename T> struct ei_product_eval_to_column_major
-{
-  typedef Matrix<typename ei_traits<T>::Scalar,
-                ei_traits<T>::RowsAtCompileTime,
-                ei_traits<T>::ColsAtCompileTime,
-                ColMajor,
-                ei_traits<T>::MaxRowsAtCompileTime,
-                ei_traits<T>::MaxColsAtCompileTime
-          > type;
-};
-
 template<typename T> struct ei_product_copy_rhs
 {
  typedef typename ei_meta_if<
         (ei_traits<T>::Flags & RowMajorBit)
      || (!(ei_traits<T>::Flags & DirectAccessBit)),
-      typename ei_product_eval_to_column_major<T>::type,
+      typename ei_plain_matrix_type_column_major<T>::type,
      const T&
    >::ret type;
 };
@@ -731,7 +743,7 @@ template<typename T> struct ei_product_copy_lhs
 {
  typedef typename ei_meta_if<
      (!(int(ei_traits<T>::Flags) & DirectAccessBit)),
-      typename ei_eval<T>::type,
+      typename ei_plain_matrix_type<T>::type,
      const T&
    >::ret type;
 };
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -26,56 +26,274 @@
 #ifndef EIGEN_REDUX_H
 #define EIGEN_REDUX_H

-template<typename BinaryOp, typename Derived, int Start, int Length>
-struct ei_redux_impl
+// TODO
+//  * implement other kind of vectorization
+//  * factorize code
+
+/***************************************************************************
+* Part 1 : the logic deciding a strategy for vectorization and unrolling
+***************************************************************************/
+
+template<typename Func, typename Derived>
+struct ei_redux_traits
+{
+private:
+  enum {
+    PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
+    InnerMaxSize = int(Derived::Flags)&RowMajorBit
+                 ? Derived::MaxColsAtCompileTime
+                 : Derived::MaxRowsAtCompileTime
+  };
+
+  enum {
+    MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
+                  && (ei_functor_traits<Func>::PacketAccess),
+    MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
+    MaySliceVectorize  = MightVectorize && int(InnerMaxSize)>=3*PacketSize 
+  };
+
+public:
+  enum {
+    Vectorization = int(MayLinearVectorize) ? int(LinearVectorization)
+                  : int(MaySliceVectorize)  ? int(SliceVectorization)
+                                            : int(NoVectorization)
+  };
+  
+private:
+  enum {
+    Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
+           + (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::AddCost,
+    UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
+  };
+
+public:
+  enum {
+    Unrolling = Cost <= UnrollingLimit
+              ? CompleteUnrolling
+              : NoUnrolling
+  };
+};
+
+/***************************************************************************
+* Part 2 : unrollers
+***************************************************************************/
+
+/*** no vectorization ***/
+
+template<typename Func, typename Derived, int Start, int Length>
+struct ei_redux_novec_unroller
 {
  enum {
    HalfLength = Length/2
  };

-  typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
+  typedef typename Derived::Scalar Scalar;

-  static Scalar run(const Derived &mat, const BinaryOp& func)
+  EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func& func)
  {
-    return func(
-      ei_redux_impl<BinaryOp, Derived, Start, HalfLength>::run(mat, func),
-      ei_redux_impl<BinaryOp, Derived, Start+HalfLength, Length - HalfLength>::run(mat, func));
+    return func(ei_redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
+                ei_redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
  }
 };

-template<typename BinaryOp, typename Derived, int Start>
-struct ei_redux_impl<BinaryOp, Derived, Start, 1>
+template<typename Func, typename Derived, int Start>
+struct ei_redux_novec_unroller<Func, Derived, Start, 1>
 {
  enum {
    col = Start / Derived::RowsAtCompileTime,
    row = Start % Derived::RowsAtCompileTime
  };

-  typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
+  typedef typename Derived::Scalar Scalar;

-  static Scalar run(const Derived &mat, const BinaryOp &)
+  EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func&)
  {
    return mat.coeff(row, col);
  }
 };

-template<typename BinaryOp, typename Derived, int Start>
-struct ei_redux_impl<BinaryOp, Derived, Start, Dynamic>
+/*** vectorization ***/
+  
+template<typename Func, typename Derived, int Start, int Length>
+struct ei_redux_vec_unroller
 {
-  typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
-  static Scalar run(const Derived& mat, const BinaryOp& func)
+  enum {
+    PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
+    HalfLength = Length/2
+  };
+
+  typedef typename Derived::Scalar Scalar;
+  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
+
+  EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func& func)
  {
+    return func.packetOp(
+            ei_redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
+            ei_redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
+  }
+};
+
+template<typename Func, typename Derived, int Start>
+struct ei_redux_vec_unroller<Func, Derived, Start, 1>
+{
+  enum {
+    index = Start * ei_packet_traits<typename Derived::Scalar>::size,
+    row = int(Derived::Flags)&RowMajorBit
+        ? index / int(Derived::ColsAtCompileTime)
+        : index % Derived::RowsAtCompileTime,
+    col = int(Derived::Flags)&RowMajorBit
+        ? index % int(Derived::ColsAtCompileTime)
+        : index / Derived::RowsAtCompileTime,
+    alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
+  };
+
+  typedef typename Derived::Scalar Scalar;
+  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
+
+  EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&)
+  {
+    return mat.template packet<alignment>(row, col);
+  }
+};
+
+/***************************************************************************
+* Part 3 : implementation of all cases
+***************************************************************************/
+
+template<typename Func, typename Derived,
+         int Vectorization = ei_redux_traits<Func, Derived>::Vectorization,
+         int Unrolling = ei_redux_traits<Func, Derived>::Unrolling
+>
+struct ei_redux_impl;
+
+template<typename Func, typename Derived>
+struct ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>
+{
+  typedef typename Derived::Scalar Scalar;
+  static Scalar run(const Derived& mat, const Func& func)
+  {
+    ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix");
    Scalar res;
-    res = mat.coeff(0,0);
-    for(int i = 1; i < mat.rows(); i++)
+    res = mat.coeff(0, 0);
+    for(int i = 1; i < mat.rows(); ++i)
      res = func(res, mat.coeff(i, 0));
-    for(int j = 1; j < mat.cols(); j++)
-      for(int i = 0; i < mat.rows(); i++)
+    for(int j = 1; j < mat.cols(); ++j)
+      for(int i = 0; i < mat.rows(); ++i)
        res = func(res, mat.coeff(i, j));
    return res;
  }
 };

+template<typename Func, typename Derived>
+struct ei_redux_impl<Func,Derived, NoVectorization, CompleteUnrolling>
+  : public ei_redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
+{};
+
+template<typename Func, typename Derived>
+struct ei_redux_impl<Func, Derived, LinearVectorization, NoUnrolling>
+{
+  typedef typename Derived::Scalar Scalar;
+  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
+
+  static Scalar run(const Derived& mat, const Func& func)
+  {
+    const int size = mat.size();
+    const int packetSize = ei_packet_traits<Scalar>::size;
+    const int alignedStart =  (Derived::Flags & AlignedBit)
+                           || !(Derived::Flags & DirectAccessBit)
+                           ? 0
+                           : ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size);
+    enum {
+      alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
+                ? Aligned : Unaligned
+    };
+    const int alignedSize = ((size-alignedStart)/packetSize)*packetSize;
+    const int alignedEnd = alignedStart + alignedSize;
+    Scalar res;
+    if(alignedSize)
+    {
+      PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
+      for(int index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
+        packet_res = func.packetOp(packet_res, mat.template packet<alignment>(index));
+      res = func.predux(packet_res);
+      
+      for(int index = 0; index < alignedStart; ++index)
+        res = func(res,mat.coeff(index));
+
+      for(int index = alignedEnd; index < size; ++index)
+        res = func(res,mat.coeff(index));
+    }
+    else // too small to vectorize anything.
+         // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
+    {
+      res = mat.coeff(0);
+      for(int index = 1; index < size; ++index)
+        res = func(res,mat.coeff(index));
+    }
+
+    return res;
+  }
+};
+
+template<typename Func, typename Derived>
+struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling>
+{
+  typedef typename Derived::Scalar Scalar;
+  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
+
+  static Scalar run(const Derived& mat, const Func& func)
+  {
+    const int innerSize = mat.innerSize();
+    const int outerSize = mat.outerSize();
+    enum {
+      packetSize = ei_packet_traits<Scalar>::size,
+      isRowMajor = Derived::Flags&RowMajorBit?1:0
+    };
+    const int packetedInnerSize = ((innerSize)/packetSize)*packetSize;
+    Scalar res;
+    if(packetedInnerSize)
+    {
+      PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
+      for(int j=0; j<outerSize; ++j)
+        for(int i=0; i<packetedInnerSize; i+=int(packetSize))
+          packet_res = func.packetOp(packet_res, mat.template packet<Unaligned>
+                                                 (isRowMajor?j:i, isRowMajor?i:j));
+      
+      res = func.predux(packet_res);
+      for(int j=0; j<outerSize; ++j)
+        for(int i=packetedInnerSize; i<innerSize; ++i)
+          res = func(res, mat.coeff(isRowMajor?j:i, isRowMajor?i:j));
+    }
+    else // too small to vectorize anything.
+         // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
+    {
+      res = ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>::run(mat, func);
+    }
+
+    return res;
+  }
+};
+
+template<typename Func, typename Derived>
+struct ei_redux_impl<Func, Derived, LinearVectorization, CompleteUnrolling>
+{
+  typedef typename Derived::Scalar Scalar;
+  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
+  enum {
+    PacketSize = ei_packet_traits<Scalar>::size,
+    Size = Derived::SizeAtCompileTime,
+    VectorizationSize = (Size / PacketSize) * PacketSize
+  };
+  EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
+  {
+    Scalar res = func.predux(ei_redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
+    if (VectorizationSize != Size)
+      res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizationSize, Size-VectorizationSize>::run(mat,func));
+    return res;
+  }
+};
+
+
 /** \returns the result of a full redux operation on the whole matrix or vector using \a func
  *
  * The template parameter \a BinaryOp is the type of the functor \a func which must be
@@ -84,21 +302,20 @@ struct ei_redux_impl<BinaryOp, Derived, Start, Dynamic>
  * \sa MatrixBase::sum(), MatrixBase::minCoeff(), MatrixBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
  */
 template<typename Derived>
-template<typename BinaryOp>
-typename ei_result_of<BinaryOp(typename ei_traits<Derived>::Scalar)>::type
-MatrixBase<Derived>::redux(const BinaryOp& func) const
+template<typename Func>
+inline typename ei_result_of<Func(typename ei_traits<Derived>::Scalar)>::type
+MatrixBase<Derived>::redux(const Func& func) const
 {
-  const bool unroll = SizeAtCompileTime * CoeffReadCost
-                    + (SizeAtCompileTime-1) * ei_functor_traits<BinaryOp>::Cost
-                    <= EIGEN_UNROLLING_LIMIT;
-  return ei_redux_impl<BinaryOp, Derived, 0, unroll ? int(SizeAtCompileTime) : Dynamic>
-            ::run(derived(), func);
+  typename Derived::Nested nested(derived());
+  typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested;
+  return ei_redux_impl<Func, ThisNested>
+            ::run(nested, func);
 }

 /** \returns the minimum of all coefficients of *this
  */
 template<typename Derived>
-inline typename ei_traits<Derived>::Scalar
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
 MatrixBase<Derived>::minCoeff() const
 {
  return this->redux(Eigen::ei_scalar_min_op<Scalar>());
@@ -107,10 +324,48 @@ MatrixBase<Derived>::minCoeff() const
 /** \returns the maximum of all coefficients of *this
  */
 template<typename Derived>
-inline typename ei_traits<Derived>::Scalar
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
 MatrixBase<Derived>::maxCoeff() const
 {
  return this->redux(Eigen::ei_scalar_max_op<Scalar>());
 }

+/** \returns the sum of all coefficients of *this
+  *
+  * \sa trace(), prod()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
+MatrixBase<Derived>::sum() const
+{
+  return this->redux(Eigen::ei_scalar_sum_op<Scalar>());
+}
+
+/** \returns the product of all coefficients of *this
+  *
+  * Example: \include MatrixBase_prod.cpp
+  * Output: \verbinclude MatrixBase_prod.out
+  *
+  * \sa sum()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
+MatrixBase<Derived>::prod() const
+{
+  return this->redux(Eigen::ei_scalar_product_op<Scalar>());
+}
+
+/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
+  *
+  * \c *this can be any matrix, not necessarily square.
+  *
+  * \sa diagonal(), sum()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
+MatrixBase<Derived>::trace() const
+{
+  return diagonal().sum();
+}
+
 #endif // EIGEN_REDUX_H
--- a/Eigen/src/Core/ReturnByValue.h
+++ b/Eigen/src/Core/ReturnByValue.h
@@ -0,0 +1,73 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_RETURNBYVALUE_H
+#define EIGEN_RETURNBYVALUE_H
+
+/** \class ReturnByValue
+  *
+  */
+template<typename Functor, typename _Scalar,int _Rows,int _Cols,int _Options,int _MaxRows,int _MaxCols>
+struct ei_traits<ReturnByValue<Functor,Matrix<_Scalar,_Rows,_Cols,_Options,_MaxRows,_MaxCols> > >
+  : public ei_traits<Matrix<_Scalar,_Rows,_Cols,_Options,_MaxRows,_MaxCols> >
+{
+  enum {
+    Flags = ei_traits<Matrix<_Scalar,_Rows,_Cols,_Options,_MaxRows,_MaxCols> >::Flags | EvalBeforeNestingBit
+  };
+};
+
+template<typename Functor,typename EvalTypeDerived,int n>
+struct ei_nested<ReturnByValue<Functor,MatrixBase<EvalTypeDerived> >, n, EvalTypeDerived>
+{
+  typedef EvalTypeDerived type;
+};
+
+template<typename Functor, typename EvalType> class ReturnByValue
+{
+  public:
+    template<typename Dest>
+    inline void evalTo(Dest& dst) const
+    { static_cast<const Functor*>(this)->evalTo(dst); }
+};
+
+template<typename Functor, typename _Scalar,int _Rows,int _Cols,int _Options,int _MaxRows,int _MaxCols>
+  class ReturnByValue<Functor,Matrix<_Scalar,_Rows,_Cols,_Options,_MaxRows,_MaxCols> >
+  : public MatrixBase<ReturnByValue<Functor,Matrix<_Scalar,_Rows,_Cols,_Options,_MaxRows,_MaxCols> > >
+{
+  public:
+    EIGEN_GENERIC_PUBLIC_INTERFACE(ReturnByValue)
+    template<typename Dest>
+    inline void evalTo(Dest& dst) const
+    { static_cast<const Functor* const>(this)->evalTo(dst); }
+};
+
+template<typename Derived>
+template<typename OtherDerived,typename OtherEvalType>
+Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived,OtherEvalType>& other)
+{
+  other.evalTo(derived());
+  return derived();
+}
+
+#endif // EIGEN_RETURNBYVALUE_H
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -30,11 +30,11 @@ template<typename XprType, unsigned int Mode> struct ei_is_part<Part<XprType,Mod

 template<typename Lhs, typename Rhs,
  int TriangularPart = (int(Lhs::Flags) & LowerTriangularBit)
-                     ? Lower
+                     ? LowerTriangular
                     : (int(Lhs::Flags) & UpperTriangularBit)
-                     ? Upper
-                     : -1,
-  int StorageOrder = ei_is_part<Lhs>::value ? -1  // this is to solve ambiguous specializations
+                     ? UpperTriangular
+                     : 0xffffff,
+  int StorageOrder = ei_is_part<Lhs>::value ? 0xffffff  // this is to solve ambiguous specializations
                   : int(Lhs::Flags) & (RowMajorBit|SparseBit)
  >
 struct ei_solve_triangular_selector;
@@ -56,14 +56,14 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,RowMajor|IsDense>
  typedef typename Rhs::Scalar Scalar;
  static void run(const Lhs& lhs, Rhs& other)
  {
-    const bool IsLower = (UpLo==Lower);
+    const bool IsLowerTriangular = (UpLo==LowerTriangular);
    const int size = lhs.cols();
    /* We perform the inverse product per block of 4 rows such that we perfectly match
     * our optimized matrix * vector product. blockyStart represents the number of rows
     * we have process first using the non-block version.
     */
    int blockyStart = (std::max(size-5,0)/4)*4;
-    if (IsLower)
+    if (IsLowerTriangular)
      blockyStart = size - blockyStart;
    else
      blockyStart -= 1;
@@ -72,15 +72,15 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,RowMajor|IsDense>
      // process first rows using the non block version
      if(!(Lhs::Flags & UnitDiagBit))
      {
-        if (IsLower)
+        if (IsLowerTriangular)
          other.coeffRef(0,c) = other.coeff(0,c)/lhs.coeff(0, 0);
        else
          other.coeffRef(size-1,c) = other.coeff(size-1, c)/lhs.coeff(size-1, size-1);
      }
-      for(int i=(IsLower ? 1 : size-2); IsLower ? i<blockyStart : i>blockyStart; i += (IsLower ? 1 : -1) )
+      for(int i=(IsLowerTriangular ? 1 : size-2); IsLowerTriangular ? i<blockyStart : i>blockyStart; i += (IsLowerTriangular ? 1 : -1) )
      {
        Scalar tmp = other.coeff(i,c)
-          - (IsLower ? ((lhs.row(i).start(i)) * other.col(c).start(i)).coeff(0,0)
+          - (IsLowerTriangular ? ((lhs.row(i).start(i)) * other.col(c).start(i)).coeff(0,0)
                     : ((lhs.row(i).end(size-i-1)) * other.col(c).end(size-i-1)).coeff(0,0));
        if (Lhs::Flags & UnitDiagBit)
          other.coeffRef(i,c) = tmp;
@@ -88,39 +88,39 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,RowMajor|IsDense>
          other.coeffRef(i,c) = tmp/lhs.coeff(i,i);
      }

-      // now let process the remaining rows 4 at once
-      for(int i=blockyStart; IsLower ? i<size : i>0; )
+      // now let's process the remaining rows 4 at once
+      for(int i=blockyStart; IsLowerTriangular ? i<size : i>0; )
      {
        int startBlock = i;
-        int endBlock = startBlock + (IsLower ? 4 : -4);
-        
+        int endBlock = startBlock + (IsLowerTriangular ? 4 : -4);
+
        /* Process the i cols times 4 rows block, and keep the result in a temporary vector */
        // FIXME use fixed size block but take care to small fixed size matrices...
        Matrix<Scalar,Dynamic,1> btmp(4);
-        if (IsLower)
+        if (IsLowerTriangular)
          btmp = lhs.block(startBlock,0,4,i) * other.col(c).start(i);
        else
          btmp = lhs.block(i-3,i+1,4,size-1-i) * other.col(c).end(size-1-i);
-        
+
        /* Let's process the 4x4 sub-matrix as usual.
         * btmp stores the diagonal coefficients used to update the remaining part of the result.
         */
        {
-          Scalar tmp = other.coeff(startBlock,c)-btmp.coeff(IsLower?0:3);
+          Scalar tmp = other.coeff(startBlock,c)-btmp.coeff(IsLowerTriangular?0:3);
          if (Lhs::Flags & UnitDiagBit)
            other.coeffRef(i,c) = tmp;
          else
            other.coeffRef(i,c) = tmp/lhs.coeff(i,i);
        }

-        i += IsLower ? 1 : -1;
-        for (;IsLower ? i<endBlock : i>endBlock; i += IsLower ? 1 : -1)
+        i += IsLowerTriangular ? 1 : -1;
+        for (;IsLowerTriangular ? i<endBlock : i>endBlock; i += IsLowerTriangular ? 1 : -1)
        {
-          int remainingSize = IsLower ? i-startBlock : startBlock-i;
+          int remainingSize = IsLowerTriangular ? i-startBlock : startBlock-i;
          Scalar tmp = other.coeff(i,c)
-            - btmp.coeff(IsLower ? remainingSize : 3-remainingSize)
-            - (   lhs.row(i).block(IsLower ? startBlock : i+1, remainingSize)
-              * other.col(c).block(IsLower ? startBlock : i+1, remainingSize)).coeff(0,0);
+            - btmp.coeff(IsLowerTriangular ? remainingSize : 3-remainingSize)
+            - (   lhs.row(i).segment(IsLowerTriangular ? startBlock : i+1, remainingSize)
+              * other.col(c).segment(IsLowerTriangular ? startBlock : i+1, remainingSize)).coeff(0,0);

          if (Lhs::Flags & UnitDiagBit)
            other.coeffRef(i,c) = tmp;
@@ -133,10 +133,10 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,RowMajor|IsDense>
 };

 // Implements the following configurations:
-//  - inv(Lower,         ColMajor) * Column vector
-//  - inv(Lower,UnitDiag,ColMajor) * Column vector
-//  - inv(Upper,         ColMajor) * Column vector
-//  - inv(Upper,UnitDiag,ColMajor) * Column vector
+//  - inv(LowerTriangular,         ColMajor) * Column vector
+//  - inv(LowerTriangular,UnitDiag,ColMajor) * Column vector
+//  - inv(UpperTriangular,         ColMajor) * Column vector
+//  - inv(UpperTriangular,UnitDiag,ColMajor) * Column vector
 template<typename Lhs, typename Rhs, int UpLo>
 struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,ColMajor|IsDense>
 {
@@ -146,7 +146,7 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,ColMajor|IsDense>

  static void run(const Lhs& lhs, Rhs& other)
  {
-    static const bool IsLower = (UpLo==Lower);
+    static const bool IsLowerTriangular = (UpLo==LowerTriangular);
    const int size = lhs.cols();
    for(int c=0 ; c<other.cols() ; ++c)
    {
@@ -155,27 +155,27 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,ColMajor|IsDense>
       * we can process using the block version.
       */
      int blockyEnd = (std::max(size-5,0)/4)*4;
-      if (!IsLower)
+      if (!IsLowerTriangular)
        blockyEnd = size-1 - blockyEnd;
-      for(int i=IsLower ? 0 : size-1; IsLower ? i<blockyEnd : i>blockyEnd;)
+      for(int i=IsLowerTriangular ? 0 : size-1; IsLowerTriangular ? i<blockyEnd : i>blockyEnd;)
      {
        /* Let's process the 4x4 sub-matrix as usual.
         * btmp stores the diagonal coefficients used to update the remaining part of the result.
         */
        int startBlock = i;
-        int endBlock = startBlock + (IsLower ? 4 : -4);
+        int endBlock = startBlock + (IsLowerTriangular ? 4 : -4);
        Matrix<Scalar,4,1> btmp;
-        for (;IsLower ? i<endBlock : i>endBlock;
-             i += IsLower ? 1 : -1)
+        for (;IsLowerTriangular ? i<endBlock : i>endBlock;
+             i += IsLowerTriangular ? 1 : -1)
        {
          if(!(Lhs::Flags & UnitDiagBit))
            other.coeffRef(i,c) /= lhs.coeff(i,i);
-          int remainingSize = IsLower ? endBlock-i-1 : i-endBlock-1;
+          int remainingSize = IsLowerTriangular ? endBlock-i-1 : i-endBlock-1;
          if (remainingSize>0)
-            other.col(c).block((IsLower ? i : endBlock) + 1, remainingSize) -=
+            other.col(c).segment((IsLowerTriangular ? i : endBlock) + 1, remainingSize) -=
                other.coeffRef(i,c)
-              * Block<Lhs,Dynamic,1>(lhs, (IsLower ? i : endBlock) + 1, i, remainingSize, 1);
-          btmp.coeffRef(IsLower ? i-startBlock : remainingSize) = -other.coeffRef(i,c);
+              * Block<Lhs,Dynamic,1>(lhs, (IsLowerTriangular ? i : endBlock) + 1, i, remainingSize, 1);
+          btmp.coeffRef(IsLowerTriangular ? i-startBlock : remainingSize) = -other.coeffRef(i,c);
        }

        /* Now we can efficiently update the remaining part of the result as a matrix * vector product.
@@ -187,15 +187,21 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,ColMajor|IsDense>
        // FIXME this is cool but what about conjugate/adjoint expressions ? do we want to evaluate them ?
        // this is a more general problem though.
        ei_cache_friendly_product_colmajor_times_vector(
-          IsLower ? size-endBlock : endBlock+1,
-          &(lhs.const_cast_derived().coeffRef(IsLower ? endBlock : 0, IsLower ? startBlock : endBlock+1)),
+          IsLowerTriangular ? size-endBlock : endBlock+1,
+          &(lhs.const_cast_derived().coeffRef(IsLowerTriangular ? endBlock : 0, IsLowerTriangular ? startBlock : endBlock+1)),
          lhs.stride(),
-          btmp, &(other.coeffRef(IsLower ? endBlock : 0, c)));
+          btmp, &(other.coeffRef(IsLowerTriangular ? endBlock : 0, c)));
+// 				if (IsLowerTriangular)
+//           other.col(c).end(size-endBlock) += (lhs.block(endBlock, startBlock, size-endBlock, endBlock-startBlock)
+//                                           * other.col(c).block(startBlock,endBlock-startBlock)).lazy();
+// 				else
+//           other.col(c).end(size-endBlock) += (lhs.block(endBlock, startBlock, size-endBlock, endBlock-startBlock)
+//                                           * other.col(c).block(startBlock,endBlock-startBlock)).lazy();
      }

      /* Now we have to process the remaining part as usual */
      int i;
-      for(i=blockyEnd; IsLower ? i<size-1 : i>0; i += (IsLower ? 1 : -1) )
+      for(i=blockyEnd; IsLowerTriangular ? i<size-1 : i>0; i += (IsLowerTriangular ? 1 : -1) )
      {
        if(!(Lhs::Flags & UnitDiagBit))
          other.coeffRef(i,c) /= lhs.coeff(i,i);
@@ -203,7 +209,7 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,ColMajor|IsDense>
        /* NOTE we cannot use lhs.col(i).end(size-i-1) because Part::coeffRef gets called by .col() to
         * get the address of the start of the row
         */
-        if(IsLower)
+        if(IsLowerTriangular)
          other.col(c).end(size-i-1) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, i+1,i, size-i-1,1);
        else
          other.col(c).start(i) -= other.coeffRef(i,c) * Block<Lhs,Dynamic,1>(lhs, 0,i, i, 1);
@@ -215,22 +221,39 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,ColMajor|IsDense>
 };

 /** "in-place" version of MatrixBase::solveTriangular() where the result is written in \a other
+  *
+  * \nonstableyet
+  *
+  * The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
+  * This function will const_cast it, so constness isn't honored here.
  *
  * See MatrixBase:solveTriangular() for the details.
  */
 template<typename Derived>
 template<typename OtherDerived>
-void MatrixBase<Derived>::solveTriangularInPlace(MatrixBase<OtherDerived>& other) const
+void MatrixBase<Derived>::solveTriangularInPlace(const MatrixBase<OtherDerived>& _other) const
 {
+  MatrixBase<OtherDerived>& other = _other.const_cast_derived();
  ei_assert(derived().cols() == derived().rows());
  ei_assert(derived().cols() == other.rows());
  ei_assert(!(Flags & ZeroDiagBit));
  ei_assert(Flags & (UpperTriangularBit|LowerTriangularBit));

-  ei_solve_triangular_selector<Derived, OtherDerived>::run(derived(), other.derived());
+  enum { copy = ei_traits<OtherDerived>::Flags & RowMajorBit };
+
+  typedef typename ei_meta_if<copy,
+    typename ei_plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::ret OtherCopy;
+  OtherCopy otherCopy(other.derived());
+
+  ei_solve_triangular_selector<Derived, typename ei_unref<OtherCopy>::type>::run(derived(), otherCopy);
+
+  if (copy)
+    other = otherCopy;
 }

 /** \returns the product of the inverse of \c *this with \a other, \a *this being triangular.
+  *
+  * \nonstableyet
  *
  * This function computes the inverse-matrix matrix product inverse(\c *this) * \a other.
  * The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the
@@ -240,17 +263,17 @@ void MatrixBase<Derived>::solveTriangularInPlace(MatrixBase<OtherDerived>& other
  * It is required that \c *this be marked as either an upper or a lower triangular matrix, which
  * can be done by marked(), and that is automatically the case with expressions such as those returned
  * by extract().
-  * 
+  *
  * \addexample SolveTriangular \label How to solve a triangular system (aka. how to multiply the inverse of a triangular matrix by another one)
-  * 
+  *
  * Example: \include MatrixBase_marked.cpp
  * Output: \verbinclude MatrixBase_marked.out
-  * 
+  *
  * This function is essentially a wrapper to the faster solveTriangularInPlace() function creating
  * a temporary copy of \a other, calling solveTriangularInPlace() on the copy and returning it.
  * Therefore, if \a other is not needed anymore, it is quite faster to call solveTriangularInPlace()
  * instead of solveTriangular().
-  * 
+  *
  * For users coming from BLAS, this function (and more specifically solveTriangularInPlace()) offer
  * all the operations supported by the \c *TRSV and \c *TRSM BLAS routines.
  *
@@ -258,14 +281,15 @@ void MatrixBase<Derived>::solveTriangularInPlace(MatrixBase<OtherDerived>& other
  * \code
  * M * T^1  <=>  T.transpose().solveTriangularInPlace(M.transpose());
  * \endcode
-  * 
+  *
  * \sa solveTriangularInPlace(), marked(), extract()
  */
 template<typename Derived>
 template<typename OtherDerived>
-typename OtherDerived::Eval MatrixBase<Derived>::solveTriangular(const MatrixBase<OtherDerived>& other) const
+typename ei_plain_matrix_type_column_major<OtherDerived>::type
+MatrixBase<Derived>::solveTriangular(const MatrixBase<OtherDerived>& other) const
 {
-  typename OtherDerived::Eval res(other);
+  typename ei_plain_matrix_type_column_major<OtherDerived>::type res(other);
  solveTriangularInPlace(res);
  return res;
 }
--- a/Eigen/src/Core/Sum.h
+++ b/Eigen/src/Core/Sum.h
@@ -1,269 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra. Eigen itself is part of the KDE project.
-//
-// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2008 Benoit Jacob <jacob@math.jussieu.fr>
-//
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
-
-#ifndef EIGEN_SUM_H
-#define EIGEN_SUM_H
-
-/***************************************************************************
-* Part 1 : the logic deciding a strategy for vectorization and unrolling
-***************************************************************************/
-
-template<typename Derived>
-struct ei_sum_traits
-{
-private:
-  enum {
-    PacketSize = ei_packet_traits<typename Derived::Scalar>::size
-  };
-
-public:
-  enum {
-    Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
-                 && (int(Derived::Flags)&LinearAccessBit)
-                 && (int(Derived::SizeAtCompileTime)>2*PacketSize)
-                  ? LinearVectorization
-                  : NoVectorization
-  };
-
-private:
-  enum {
-    Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
-           + (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::AddCost,
-    UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
-  };
-
-public:
-  enum {
-    Unrolling = Cost <= UnrollingLimit
-              ? CompleteUnrolling
-              : NoUnrolling
-  };
-};
-
-/***************************************************************************
-* Part 2 : unrollers
-***************************************************************************/
-
-/*** no vectorization ***/
-
-template<typename Derived, int Start, int Length>
-struct ei_sum_novec_unroller
-{
-  enum {
-    HalfLength = Length/2
-  };
-
-  typedef typename Derived::Scalar Scalar;
-
-  inline static Scalar run(const Derived &mat)
-  {
-    return ei_sum_novec_unroller<Derived, Start, HalfLength>::run(mat)
-         + ei_sum_novec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat);
-  }
-};
-
-template<typename Derived, int Start>
-struct ei_sum_novec_unroller<Derived, Start, 1>
-{
-  enum {
-    col = Start / Derived::RowsAtCompileTime,
-    row = Start % Derived::RowsAtCompileTime
-  };
-
-  typedef typename Derived::Scalar Scalar;
-
-  inline static Scalar run(const Derived &mat)
-  {
-    return mat.coeff(row, col);
-  }
-};
-
-/*** vectorization ***/
-
-template<typename Derived, int Index, int Stop,
-         bool LastPacket = (Stop-Index == ei_packet_traits<typename Derived::Scalar>::size)>
-struct ei_sum_vec_unroller
-{
-  enum {
-    row = int(Derived::Flags)&RowMajorBit
-        ? Index / int(Derived::ColsAtCompileTime)
-        : Index % Derived::RowsAtCompileTime,
-    col = int(Derived::Flags)&RowMajorBit
-        ? Index % int(Derived::ColsAtCompileTime)
-        : Index / Derived::RowsAtCompileTime
-  };
-
-  typedef typename Derived::Scalar Scalar;
-  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
-
-  inline static PacketScalar run(const Derived &mat)
-  {
-    return ei_padd(
-      mat.template packet<Aligned>(row, col),
-      ei_sum_vec_unroller<Derived, Index+ei_packet_traits<typename Derived::Scalar>::size, Stop>::run(mat)
-    );
-  }
-};
-
-template<typename Derived, int Index, int Stop>
-struct ei_sum_vec_unroller<Derived, Index, Stop, true>
-{
-  enum {
-    row = int(Derived::Flags)&RowMajorBit
-        ? Index / int(Derived::ColsAtCompileTime)
-        : Index % Derived::RowsAtCompileTime,
-    col = int(Derived::Flags)&RowMajorBit
-        ? Index % int(Derived::ColsAtCompileTime)
-        : Index / Derived::RowsAtCompileTime,
-    alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
-  };
-
-  typedef typename Derived::Scalar Scalar;
-  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
-
-  inline static PacketScalar run(const Derived &mat)
-  {
-    return mat.template packet<alignment>(row, col);
-  }
-};
-
-/***************************************************************************
-* Part 3 : implementation of all cases
-***************************************************************************/
-
-template<typename Derived,
-         int Vectorization = ei_sum_traits<Derived>::Vectorization,
-         int Unrolling = ei_sum_traits<Derived>::Unrolling
->
-struct ei_sum_impl;
-
-template<typename Derived>
-struct ei_sum_impl<Derived, NoVectorization, NoUnrolling>
-{
-  typedef typename Derived::Scalar Scalar;
-  static Scalar run(const Derived& mat)
-  {
-    Scalar res;
-    res = mat.coeff(0, 0);
-    for(int i = 1; i < mat.rows(); i++)
-      res += mat.coeff(i, 0);
-    for(int j = 1; j < mat.cols(); j++)
-      for(int i = 0; i < mat.rows(); i++)
-        res += mat.coeff(i, j);
-    return res;
-  }
-};
-
-template<typename Derived>
-struct ei_sum_impl<Derived, NoVectorization, CompleteUnrolling>
-  : public ei_sum_novec_unroller<Derived, 0, Derived::SizeAtCompileTime>
-{};
-
-template<typename Derived>
-struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
-{
-  typedef typename Derived::Scalar Scalar;
-  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
-
-  static Scalar run(const Derived& mat)
-  {
-    const int size = mat.size();
-    const int packetSize = ei_packet_traits<Scalar>::size;
-    const int alignedStart =  (Derived::Flags & AlignedBit)
-                           || !(Derived::Flags & DirectAccessBit)
-                           ? 0
-                           : ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size);
-    enum {
-      alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
-                ? Aligned : Unaligned
-    };
-    const int alignedSize = ((size-alignedStart)/packetSize)*packetSize;
-    const int alignedEnd = alignedStart + alignedSize;
-    Scalar res;
-
-    if(alignedSize)
-    {
-      PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
-      for(int index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
-        packet_res = ei_padd(packet_res, mat.template packet<alignment>(index));
-      res = ei_predux(packet_res);
-    }
-    else // too small to vectorize anything.
-         // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
-    {
-      res = Scalar(0);
-    }
-
-    for(int index = 0; index < alignedStart; index++)
-      res += mat.coeff(index);
-
-    for(int index = alignedEnd; index < size; index++)
-      res += mat.coeff(index);
-
-    return res;
-  }
-};
-
-template<typename Derived>
-struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling>
-{
-  typedef typename Derived::Scalar Scalar;
-  static Scalar run(const Derived& mat)
-  {
-    return ei_predux(
-      ei_sum_vec_unroller<Derived, 0, Derived::SizeAtCompileTime>::run(mat)
-    );
-  }
-};
-
-/***************************************************************************
-* Part 4 : implementation of MatrixBase methods
-***************************************************************************/
-
-/** \returns the sum of all coefficients of *this
-  *
-  * \sa trace()
-  */
-template<typename Derived>
-inline typename ei_traits<Derived>::Scalar
-MatrixBase<Derived>::sum() const
-{
-  return ei_sum_impl<Derived>::run(derived());
-}
-
-/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
-  *
-  * \c *this can be any matrix, not necessarily square.
-  *
-  * \sa diagonal(), sum()
-  */
-template<typename Derived>
-inline typename ei_traits<Derived>::Scalar
-MatrixBase<Derived>::trace() const
-{
-  return diagonal().sum();
-}
-
-#endif // EIGEN_SUM_H
--- a/Eigen/src/Core/Swap.h
+++ b/Eigen/src/Core/Swap.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -63,8 +63,6 @@ template<typename MatrixType> class Transpose

    EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)

-    class InnerIterator;
-
    inline Transpose(const MatrixType& matrix) : m_matrix(matrix) {}

    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
@@ -79,21 +77,21 @@ template<typename MatrixType> class Transpose
      return m_matrix.const_cast_derived().coeffRef(col, row);
    }

-    inline const Scalar coeff(int row, int col) const
+    inline Scalar& coeffRef(int index)
+    {
+      return m_matrix.const_cast_derived().coeffRef(index);
+    }
+
+    inline const CoeffReturnType coeff(int row, int col) const
    {
      return m_matrix.coeff(col, row);
    }

-    inline const Scalar coeff(int index) const
+    inline const CoeffReturnType coeff(int index) const
    {
      return m_matrix.coeff(index);
    }

-    inline Scalar& coeffRef(int index)
-    {
-      return m_matrix.const_cast_derived().coeffRef(index);
-    }
-
    template<int LoadMode>
    inline const PacketScalar packet(int row, int col) const
    {
@@ -127,7 +125,20 @@ template<typename MatrixType> class Transpose
  * Example: \include MatrixBase_transpose.cpp
  * Output: \verbinclude MatrixBase_transpose.out
  *
-  * \sa adjoint(), class DiagonalCoeffs */
+  * \warning If you want to replace a matrix by its own transpose, do \b NOT do this:
+  * \code
+  * m = m.transpose(); // bug!!! caused by aliasing effect
+  * \endcode
+  * Instead, use the transposeInPlace() method:
+  * \code
+  * m.transposeInPlace();
+  * \endcode
+  * which gives Eigen good opportunities for optimization, or alternatively you can also do:
+  * \code
+  * m = m.transpose().eval();
+  * \endcode
+  *
+  * \sa transposeInPlace(), adjoint() */
 template<typename Derived>
 inline Transpose<Derived>
 MatrixBase<Derived>::transpose()
@@ -135,7 +146,11 @@ MatrixBase<Derived>::transpose()
  return derived();
 }

-/** This is the const version of transpose(). \sa adjoint() */
+/** This is the const version of transpose().
+  *
+  * Make sure you read the warning for transpose() !
+  *
+  * \sa transposeInPlace(), adjoint() */
 template<typename Derived>
 inline const Transpose<Derived>
 MatrixBase<Derived>::transpose() const
@@ -148,7 +163,20 @@ MatrixBase<Derived>::transpose() const
  * Example: \include MatrixBase_adjoint.cpp
  * Output: \verbinclude MatrixBase_adjoint.out
  *
-  * \sa transpose(), conjugate(), class Transpose, class ei_scalar_conjugate_op */
+  * \warning If you want to replace a matrix by its own adjoint, do \b NOT do this:
+  * \code
+  * m = m.adjoint(); // bug!!! caused by aliasing effect
+  * \endcode
+  * Instead, use the adjointInPlace() method:
+  * \code
+  * m.adjointInPlace();
+  * \endcode
+  * which gives Eigen good opportunities for optimization, or alternatively you can also do:
+  * \code
+  * m = m.adjoint().eval();
+  * \endcode
+  *
+  * \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class ei_scalar_conjugate_op */
 template<typename Derived>
 inline const typename MatrixBase<Derived>::AdjointReturnType
 MatrixBase<Derived>::adjoint() const
@@ -156,4 +184,81 @@ MatrixBase<Derived>::adjoint() const
  return conjugate().nestByValue();
 }

+/***************************************************************************
+* "in place" transpose implementation
+***************************************************************************/
+
+template<typename MatrixType,
+  bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic>
+struct ei_inplace_transpose_selector;
+
+template<typename MatrixType>
+struct ei_inplace_transpose_selector<MatrixType,true> { // square matrix
+  static void run(MatrixType& m) {
+    m.template part<StrictlyUpperTriangular>().swap(m.transpose());
+  }
+};
+
+template<typename MatrixType>
+struct ei_inplace_transpose_selector<MatrixType,false> { // non square matrix
+  static void run(MatrixType& m) {
+    if (m.rows()==m.cols())
+      m.template part<StrictlyUpperTriangular>().swap(m.transpose());
+    else
+      m = m.transpose().eval();
+  }
+};
+
+/** This is the "in place" version of transpose(): it replaces \c *this by its own transpose.
+  * Thus, doing
+  * \code
+  * m.transposeInPlace();
+  * \endcode
+  * has the same effect on m as doing
+  * \code
+  * m = m.transpose().eval();
+  * \endcode
+  * and is faster and also safer because in the latter line of code, forgetting the eval() results
+  * in a bug caused by aliasing.
+  *
+  * Notice however that this method is only useful if you want to replace a matrix by its own transpose.
+  * If you just need the transpose of a matrix, use transpose().
+  *
+  * \note if the matrix is not square, then \c *this must be a resizable matrix.
+  *
+  * \sa transpose(), adjoint(), adjointInPlace() */
+template<typename Derived>
+inline void MatrixBase<Derived>::transposeInPlace()
+{
+  ei_inplace_transpose_selector<Derived>::run(derived());
+}
+
+/***************************************************************************
+* "in place" adjoint implementation
+***************************************************************************/
+
+/** This is the "in place" version of adjoint(): it replaces \c *this by its own transpose.
+  * Thus, doing
+  * \code
+  * m.adjointInPlace();
+  * \endcode
+  * has the same effect on m as doing
+  * \code
+  * m = m.adjoint().eval();
+  * \endcode
+  * and is faster and also safer because in the latter line of code, forgetting the eval() results
+  * in a bug caused by aliasing.
+  *
+  * Notice however that this method is only useful if you want to replace a matrix by its own adjoint.
+  * If you just need the adjoint of a matrix, use adjoint().
+  *
+  * \note if the matrix is not square, then \c *this must be a resizable matrix.
+  *
+  * \sa transpose(), adjoint(), transposeInPlace() */
+template<typename Derived>
+inline void MatrixBase<Derived>::adjointInPlace()
+{
+  derived() = adjoint().eval();
+}
+
 #endif // EIGEN_TRANSPOSE_H
--- a/Eigen/src/Core/Visitor.h
+++ b/Eigen/src/Core/Visitor.h
@@ -55,10 +55,10 @@ struct ei_visitor_impl<Visitor, Derived, Dynamic>
  inline static void run(const Derived& mat, Visitor& visitor)
  {
    visitor.init(mat.coeff(0,0), 0, 0);
-    for(int i = 1; i < mat.rows(); i++)
+    for(int i = 1; i < mat.rows(); ++i)
      visitor(mat.coeff(i, 0), i, 0);
-    for(int j = 1; j < mat.cols(); j++)
-      for(int i = 0; i < mat.rows(); i++)
+    for(int j = 1; j < mat.cols(); ++j)
+      for(int i = 0; i < mat.rows(); ++i)
        visitor(mat.coeff(i, j), i, j);
  }
 };
--- a/Eigen/src/Core/arch/AltiVec/CMakeLists.txt
+++ b/Eigen/src/Core/arch/AltiVec/CMakeLists.txt
@@ -2,5 +2,5 @@ FILE(GLOB Eigen_Core_arch_AltiVec_SRCS "*.h")

 INSTALL(FILES
  ${Eigen_Core_arch_AltiVec_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AltiVec
-)
+  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AltiVec COMPONENT Devel
+)
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -45,8 +45,10 @@ typedef __vector __bool int     v4bi;
 #define USE_CONST_v1i_    const v4ui  v1i_  = vec_splat_u32(-1)
 #define USE_CONST_v0f_    USE_CONST_v1i_; const v4f v0f_ = (v4f) vec_sl(v1i_, v1i_)

-template<> struct ei_packet_traits<float>  { typedef v4f type; enum {size=4}; };
-template<> struct ei_packet_traits<int>    { typedef v4i type; enum {size=4}; };
+template<> struct ei_packet_traits<float> : ei_default_packet_traits
+{ typedef v4f type; enum {size=4}; };
+template<> struct ei_packet_traits<int>   : ei_default_packet_traits
+{ typedef v4i type; enum {size=4}; };

 template<> struct ei_unpacket_traits<v4f>  { typedef float  type; enum {size=4}; };
 template<> struct ei_unpacket_traits<v4i>  { typedef int    type; enum {size=4}; };
@@ -101,6 +103,18 @@ template<> inline v4i  ei_padd(const v4i&   a, const v4i&   b) { return vec_add(
 template<> inline v4f  ei_psub(const v4f&   a, const v4f&   b) { return vec_sub(a,b); }
 template<> inline v4i  ei_psub(const v4i&   a, const v4i&   b) { return vec_sub(a,b); }

+template<> EIGEN_STRONG_INLINE v4f ei_pnegate(const v4f& a)
+{
+  v4i mask = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
+  return vec_xor(a,(v4f) mask);
+}
+
+template<> EIGEN_STRONG_INLINE v4i ei_pnegate(const v4i& a)
+{
+  USE_CONST_v0i;
+  return ei_psub(v0i, a);
+}
+
 template<> inline v4f  ei_pmul(const v4f&   a, const v4f&   b) { USE_CONST_v0f; return vec_madd(a,b, v0f); }
 template<> inline v4i  ei_pmul(const v4i&   a, const v4i&   b)
 {
@@ -111,7 +125,7 @@ template<> inline v4i  ei_pmul(const v4i&   a, const v4i&   b)
  USE_CONST_v1i;
  USE_CONST_v16i_;

-  // Get the absolute values 
+  // Get the absolute values
  a1  = vec_abs(a);
  b1  = vec_abs(b);

@@ -146,7 +160,7 @@ template<> inline v4f  ei_pdiv(const v4f&   a, const v4f&   b) {

  // Altivec does not offer a divide instruction, we have to do a reciprocal approximation
  y_0 = vec_re(b);
-  
+
  // Do one Newton-Raphson iteration to get the needed accuracy
  t = vec_nmsub(y_0, b, v1f);
  y_1 = vec_madd(y_0, t, y_0);
@@ -163,6 +177,9 @@ template<> inline v4i  ei_pmin(const v4i&   a, const v4i&   b) { return vec_min(
 template<> inline v4f  ei_pmax(const v4f&   a, const v4f&   b) { return vec_max(a,b); }
 template<> inline v4i  ei_pmax(const v4i&   a, const v4i&   b) { return vec_max(a,b); }

+template<> EIGEN_STRONG_INLINE v4f ei_pabs(const v4f& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE v4i ei_pabs(const v4i& a) { return vec_abs(a); }
+
 template<> inline v4f  ei_pload(const float* from) { return vec_ld(0, from); }
 template<> inline v4i  ei_pload(const int*   from) { return vec_ld(0, from); }

@@ -177,7 +194,7 @@ template<> inline v4f  ei_ploadu(const float*  from)
  return (v4f) vec_perm(MSQ, LSQ, mask);           // align the data
 }

-template<> inline v4i    ei_ploadu(const int*    from)
+template<> inline v4i  ei_ploadu(const int*    from)
 {
  // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
  __vector unsigned char MSQ, LSQ;
@@ -198,7 +215,7 @@ template<> inline v4f  ei_pset1(const float&  from)
  return vc;
 }

-template<> inline v4i    ei_pset1(const int&    from)
+template<> inline v4i  ei_pset1(const int&    from)
 {
  int __attribute__(aligned(16)) ai[4];
  ai[0] = from;
@@ -248,18 +265,31 @@ template<> inline void ei_pstoreu(int*    to , const v4i&    from )

 template<> inline float  ei_pfirst(const v4f&  a)
 {
-  float __attribute__(aligned(16)) af[4];
+  float EIGEN_ALIGN_128 af[4];
  vec_st(a, 0, af);
  return af[0];
 }

 template<> inline int    ei_pfirst(const v4i&  a)
 {
-  int __attribute__(aligned(16)) ai[4];
+  int EIGEN_ALIGN_128 ai[4];
  vec_st(a, 0, ai);
  return ai[0];
 }

+template<> EIGEN_STRONG_INLINE v4f ei_preverse(const v4f& a)
+{
+  static const __vector unsigned char reverse_mask =
+    {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
+  return (v4f)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask);
+}
+template<> EIGEN_STRONG_INLINE v4i ei_preverse(const v4i& a)
+{
+  static const __vector unsigned char __attribute__(aligned(16)) reverse_mask =
+    {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
+  return (v4i)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask);
+}
+
 inline v4f ei_preduxp(const v4f* vecs)
 {
  v4f v[4], sum[4];
@@ -287,6 +317,33 @@ inline v4f ei_preduxp(const v4f* vecs)
  return sum[0];
 }

+inline v4i  ei_preduxp(const v4i* vecs)
+{
+  v4i v[4], sum[4];
+
+  // It's easier and faster to transpose then add as columns
+  // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
+  // Do the transpose, first set of moves
+  v[0] = vec_mergeh(vecs[0], vecs[2]);
+  v[1] = vec_mergel(vecs[0], vecs[2]);
+  v[2] = vec_mergeh(vecs[1], vecs[3]);
+  v[3] = vec_mergel(vecs[1], vecs[3]);
+  // Get the resulting vectors
+  sum[0] = vec_mergeh(v[0], v[2]);
+  sum[1] = vec_mergel(v[0], v[2]);
+  sum[2] = vec_mergeh(v[1], v[3]);
+  sum[3] = vec_mergel(v[1], v[3]);
+
+  // Now do the summation:
+  // Lines 0+1
+  sum[0] = vec_add(sum[0], sum[1]);
+  // Lines 2+3
+  sum[1] = vec_add(sum[2], sum[3]);
+  // Add the results
+  sum[0] = vec_add(sum[0], sum[1]);
+  return sum[0];
+}
+
 inline float ei_predux(const v4f& a)
 {
  v4f b, sum;
@@ -297,33 +354,6 @@ inline float ei_predux(const v4f& a)
  return ei_pfirst(sum);
 }

-inline v4i  ei_preduxp(const v4i* vecs)
-{
-  v4i v[4], sum[4];
-
-  // It's easier and faster to transpose then add as columns
-  // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
-  // Do the transpose, first set of moves
-  v[0] = vec_mergeh(vecs[0], vecs[2]);
-  v[1] = vec_mergel(vecs[0], vecs[2]);
-  v[2] = vec_mergeh(vecs[1], vecs[3]);
-  v[3] = vec_mergel(vecs[1], vecs[3]);
-  // Get the resulting vectors
-  sum[0] = vec_mergeh(v[0], v[2]);
-  sum[1] = vec_mergel(v[0], v[2]);
-  sum[2] = vec_mergeh(v[1], v[3]);
-  sum[3] = vec_mergel(v[1], v[3]);
-
-  // Now do the summation:
-  // Lines 0+1
-  sum[0] = vec_add(sum[0], sum[1]);
-  // Lines 2+3
-  sum[1] = vec_add(sum[2], sum[3]);
-  // Add the results
-  sum[0] = vec_add(sum[0], sum[1]);
-  return sum[0];
-}
-
 inline int ei_predux(const v4i& a)
 {
  USE_CONST_v0i;
@@ -333,6 +363,55 @@ inline int ei_predux(const v4i& a)
  return ei_pfirst(sum);
 }

+// implement other reductions operators
+inline float ei_predux_mul(const v4f& a)
+{
+  v4f prod;
+  prod = ei_pmul(a, (v4f)vec_sld(a, a, 8));
+  return ei_pfirst(ei_pmul(prod, (v4f)vec_sld(prod, prod, 4)));
+}
+
+inline int ei_predux_mul(const v4i& a)
+{
+  EIGEN_ALIGN_128 int aux[4];
+  ei_pstore(aux, a);
+  return aux[0] * aux[1] * aux[2] * aux[3];
+}
+
+inline float ei_predux_min(const v4f& a)
+{
+  v4f b, res;
+  b = vec_min(a, vec_sld(a, a, 8));
+  res = vec_min(b, vec_sld(b, b, 4));
+  return ei_pfirst(res);
+}
+
+inline int ei_predux_min(const v4i& a)
+{
+  v4i b, res;
+  b = vec_min(a, vec_sld(a, a, 8));
+  res = vec_min(b, vec_sld(b, b, 4));
+  return ei_pfirst(res);
+}
+
+inline float ei_predux_max(const v4f& a)
+{
+  v4f b, res;
+  b = vec_max(a, vec_sld(a, a, 8));
+  res = vec_max(b, vec_sld(b, b, 4));
+  return ei_pfirst(res);
+}
+
+inline int ei_predux_max(const v4i& a)
+{
+  v4i b, res;
+  b = vec_max(a, vec_sld(a, a, 8));
+  res = vec_max(b, vec_sld(b, b, 4));
+  return ei_pfirst(res);
+}
+
+
+
 template<int Offset>
 struct ei_palign_impl<Offset, v4f>
 {
--- a/Eigen/src/Core/arch/SSE/CMakeLists.txt
+++ b/Eigen/src/Core/arch/SSE/CMakeLists.txt
@@ -2,5 +2,5 @@ FILE(GLOB Eigen_Core_arch_SSE_SRCS "*.h")

 INSTALL(FILES
  ${Eigen_Core_arch_SSE_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/SSE
-)
+  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/SSE COMPONENT Devel
+)
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -0,0 +1,376 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2007 Julien Pommier
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+/* The sin, cos, exp, and log functions of this file come from
+ * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
+ */
+
+#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
+#define EIGEN_MATH_FUNCTIONS_SSE_H
+
+static EIGEN_DONT_INLINE EIGEN_UNUSED Packet4f ei_plog(Packet4f x)
+{
+  _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+  _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+  _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+
+  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
+
+  /* the smallest non denormalized float number */
+  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos,  0x00800000);
+
+  /* natural logarithm computed for 4 simultaneous float
+    return NaN for x <= 0
+  */
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+
+
+  Packet4i emm0;
+
+  Packet4f invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
+
+  x = ei_pmax(x, ei_p4f_min_norm_pos);  /* cut off denormalized stuff */
+  emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
+
+  /* keep only the fractional part */
+  x = _mm_and_ps(x, ei_p4f_inv_mant_mask);
+  x = _mm_or_ps(x, ei_p4f_half);
+
+  emm0 = _mm_sub_epi32(emm0, ei_p4i_0x7f);
+  Packet4f e = ei_padd(_mm_cvtepi32_ps(emm0), ei_p4f_1);
+
+  /* part2:
+     if( x < SQRTHF ) {
+       e -= 1;
+       x = x + x - 1.0;
+     } else { x = x - 1.0; }
+  */
+  Packet4f mask = _mm_cmplt_ps(x, ei_p4f_cephes_SQRTHF);
+  Packet4f tmp = _mm_and_ps(x, mask);
+  x = ei_psub(x, ei_p4f_1);
+  e = ei_psub(e, _mm_and_ps(ei_p4f_1, mask));
+  x = ei_padd(x, tmp);
+
+  Packet4f x2 = ei_pmul(x,x);
+  Packet4f x3 = ei_pmul(x2,x);
+
+  Packet4f y, y1, y2;
+  y  = ei_pmadd(ei_p4f_cephes_log_p0, x, ei_p4f_cephes_log_p1);
+  y1 = ei_pmadd(ei_p4f_cephes_log_p3, x, ei_p4f_cephes_log_p4);
+  y2 = ei_pmadd(ei_p4f_cephes_log_p6, x, ei_p4f_cephes_log_p7);
+  y  = ei_pmadd(y , x, ei_p4f_cephes_log_p2);
+  y1 = ei_pmadd(y1, x, ei_p4f_cephes_log_p5);
+  y2 = ei_pmadd(y2, x, ei_p4f_cephes_log_p8);
+  y = ei_pmadd(y, x3, y1);
+  y = ei_pmadd(y, x3, y2);
+  y = ei_pmul(y, x3);
+
+  y1 = ei_pmul(e, ei_p4f_cephes_log_q1);
+  tmp = ei_pmul(x2, ei_p4f_half);
+  y = ei_padd(y, y1);
+  x = ei_psub(x, tmp);
+  y2 = ei_pmul(e, ei_p4f_cephes_log_q2);
+  x = ei_padd(x, y);
+  x = ei_padd(x, y2);
+  return _mm_or_ps(x, invalid_mask); // negative arg will be NAN
+}
+
+static EIGEN_DONT_INLINE EIGEN_UNUSED Packet4f ei_pexp(Packet4f x)
+{
+  _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+  _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+  _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+
+
+  _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647949f);
+  _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+
+  Packet4f tmp = _mm_setzero_ps(), fx;
+  Packet4i emm0;
+
+  // clamp x
+  x = ei_pmax(ei_pmin(x, ei_p4f_exp_hi), ei_p4f_exp_lo);
+
+  /* express exp(x) as exp(g + n*log(2)) */
+  fx = ei_pmadd(x, ei_p4f_cephes_LOG2EF, ei_p4f_half);
+
+  /* how to perform a floorf with SSE: just below */
+  emm0 = _mm_cvttps_epi32(fx);
+  tmp  = _mm_cvtepi32_ps(emm0);
+  /* if greater, substract 1 */
+  Packet4f mask = _mm_cmpgt_ps(tmp, fx);
+  mask = _mm_and_ps(mask, ei_p4f_1);
+  fx = ei_psub(tmp, mask);
+
+  tmp = ei_pmul(fx, ei_p4f_cephes_exp_C1);
+  Packet4f z = ei_pmul(fx, ei_p4f_cephes_exp_C2);
+  x = ei_psub(x, tmp);
+  x = ei_psub(x, z);
+
+  z = ei_pmul(x,x);
+
+  Packet4f y = ei_p4f_cephes_exp_p0;
+  y = ei_pmadd(y, x, ei_p4f_cephes_exp_p1);
+  y = ei_pmadd(y, x, ei_p4f_cephes_exp_p2);
+  y = ei_pmadd(y, x, ei_p4f_cephes_exp_p3);
+  y = ei_pmadd(y, x, ei_p4f_cephes_exp_p4);
+  y = ei_pmadd(y, x, ei_p4f_cephes_exp_p5);
+  y = ei_pmadd(y, z, x);
+  y = ei_padd(y, ei_p4f_1);
+
+  /* build 2^n */
+  emm0 = _mm_cvttps_epi32(fx);
+  emm0 = _mm_add_epi32(emm0, ei_p4i_0x7f);
+  emm0 = _mm_slli_epi32(emm0, 23);
+  return ei_pmul(y, _mm_castsi128_ps(emm0));
+}
+
+/* evaluation of 4 sines at onces, using SSE2 intrinsics.
+
+   The code is the exact rewriting of the cephes sinf function.
+   Precision is excellent as long as x < 8192 (I did not bother to
+   take into account the special handling they have for greater values
+   -- it does not return garbage for arguments over 8192, though, but
+   the extra precision is missing).
+
+   Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
+   surprising but correct result.
+*/
+
+static EIGEN_DONT_INLINE EIGEN_UNUSED Packet4f ei_psin(Packet4f x)
+{
+  _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+  _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+
+  _EIGEN_DECLARE_CONST_Packet4i(1, 1);
+  _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
+  _EIGEN_DECLARE_CONST_Packet4i(2, 2);
+  _EIGEN_DECLARE_CONST_Packet4i(4, 4);
+
+  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
+
+  _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
+  _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+  _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+  _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
+  _EIGEN_DECLARE_CONST_Packet4f(sincof_p1,  8.3321608736E-3f);
+  _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(coscof_p0,  2.443315711809948E-005f);
+  _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
+  _EIGEN_DECLARE_CONST_Packet4f(coscof_p2,  4.166664568298827E-002f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
+
+  Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
+
+  Packet4i emm0, emm2;
+  sign_bit = x;
+  /* take the absolute value */
+  x = ei_pabs(x);
+
+  /* take the modulo */
+
+  /* extract the sign bit (upper one) */
+  sign_bit = _mm_and_ps(sign_bit, ei_p4f_sign_mask);
+
+  /* scale by 4/Pi */
+  y = ei_pmul(x, ei_p4f_cephes_FOPI);
+
+  /* store the integer part of y in mm0 */
+  emm2 = _mm_cvttps_epi32(y);
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  emm2 = _mm_add_epi32(emm2, ei_p4i_1);
+  emm2 = _mm_and_si128(emm2, ei_p4i_not1);
+  y = _mm_cvtepi32_ps(emm2);
+  /* get the swap sign flag */
+  emm0 = _mm_and_si128(emm2, ei_p4i_4);
+  emm0 = _mm_slli_epi32(emm0, 29);
+  /* get the polynom selection mask
+     there is one polynom for 0 <= x <= Pi/4
+     and another one for Pi/4<x<=Pi/2
+
+     Both branches will be computed.
+  */
+  emm2 = _mm_and_si128(emm2, ei_p4i_2);
+  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+
+  Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
+  Packet4f poly_mask = _mm_castsi128_ps(emm2);
+  sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
+
+  /* The magic pass: "Extended precision modular arithmetic"
+     x = ((x - y * DP1) - y * DP2) - y * DP3; */
+  xmm1 = ei_pmul(y, ei_p4f_minus_cephes_DP1);
+  xmm2 = ei_pmul(y, ei_p4f_minus_cephes_DP2);
+  xmm3 = ei_pmul(y, ei_p4f_minus_cephes_DP3);
+  x = ei_padd(x, xmm1);
+  x = ei_padd(x, xmm2);
+  x = ei_padd(x, xmm3);
+
+  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
+  y = ei_p4f_coscof_p0;
+  Packet4f z = _mm_mul_ps(x,x);
+
+  y = ei_pmadd(y, z, ei_p4f_coscof_p1);
+  y = ei_pmadd(y, z, ei_p4f_coscof_p2);
+  y = ei_pmul(y, z);
+  y = ei_pmul(y, z);
+  Packet4f tmp = ei_pmul(z, ei_p4f_half);
+  y = ei_psub(y, tmp);
+  y = ei_padd(y, ei_p4f_1);
+
+  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
+
+  Packet4f y2 = ei_p4f_sincof_p0;
+  y2 = ei_pmadd(y2, z, ei_p4f_sincof_p1);
+  y2 = ei_pmadd(y2, z, ei_p4f_sincof_p2);
+  y2 = ei_pmul(y2, z);
+  y2 = ei_pmul(y2, x);
+  y2 = ei_padd(y2, x);
+
+  /* select the correct result from the two polynoms */
+  y2 = _mm_and_ps(poly_mask, y2);
+  y = _mm_andnot_ps(poly_mask, y);
+  y = _mm_or_ps(y,y2);
+  /* update the sign */
+  return _mm_xor_ps(y, sign_bit);
+}
+
+/* almost the same as ei_psin */
+static EIGEN_DONT_INLINE EIGEN_UNUSED Packet4f ei_pcos(Packet4f x)
+{
+  _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+  _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+
+  _EIGEN_DECLARE_CONST_Packet4i(1, 1);
+  _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
+  _EIGEN_DECLARE_CONST_Packet4i(2, 2);
+  _EIGEN_DECLARE_CONST_Packet4i(4, 4);
+
+  _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
+  _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+  _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+  _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
+  _EIGEN_DECLARE_CONST_Packet4f(sincof_p1,  8.3321608736E-3f);
+  _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(coscof_p0,  2.443315711809948E-005f);
+  _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
+  _EIGEN_DECLARE_CONST_Packet4f(coscof_p2,  4.166664568298827E-002f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
+
+  Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
+  Packet4i emm0, emm2;
+
+  x = ei_pabs(x);
+
+  /* scale by 4/Pi */
+  y = ei_pmul(x, ei_p4f_cephes_FOPI);
+
+  /* get the integer part of y */
+  emm2 = _mm_cvttps_epi32(y);
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  emm2 = _mm_add_epi32(emm2, ei_p4i_1);
+  emm2 = _mm_and_si128(emm2, ei_p4i_not1);
+  y = _mm_cvtepi32_ps(emm2);
+
+  emm2 = _mm_sub_epi32(emm2, ei_p4i_2);
+
+  /* get the swap sign flag */
+  emm0 = _mm_andnot_si128(emm2, ei_p4i_4);
+  emm0 = _mm_slli_epi32(emm0, 29);
+  /* get the polynom selection mask */
+  emm2 = _mm_and_si128(emm2, ei_p4i_2);
+  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+
+  Packet4f sign_bit = _mm_castsi128_ps(emm0);
+  Packet4f poly_mask = _mm_castsi128_ps(emm2);
+
+  /* The magic pass: "Extended precision modular arithmetic"
+     x = ((x - y * DP1) - y * DP2) - y * DP3; */
+  xmm1 = ei_pmul(y, ei_p4f_minus_cephes_DP1);
+  xmm2 = ei_pmul(y, ei_p4f_minus_cephes_DP2);
+  xmm3 = ei_pmul(y, ei_p4f_minus_cephes_DP3);
+  x = ei_padd(x, xmm1);
+  x = ei_padd(x, xmm2);
+  x = ei_padd(x, xmm3);
+
+  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
+  y = ei_p4f_coscof_p0;
+  Packet4f z = ei_pmul(x,x);
+
+  y = ei_pmadd(y,z,ei_p4f_coscof_p1);
+  y = ei_pmadd(y,z,ei_p4f_coscof_p2);
+  y = ei_pmul(y, z);
+  y = ei_pmul(y, z);
+  Packet4f tmp = _mm_mul_ps(z, ei_p4f_half);
+  y = ei_psub(y, tmp);
+  y = ei_padd(y, ei_p4f_1);
+
+  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
+  Packet4f y2 = ei_p4f_sincof_p0;
+  y2 = ei_pmadd(y2, z, ei_p4f_sincof_p1);
+  y2 = ei_pmadd(y2, z, ei_p4f_sincof_p2);
+  y2 = ei_pmul(y2, z);
+  y2 = ei_pmadd(y2, x, x);
+
+  /* select the correct result from the two polynoms */
+  y2 = _mm_and_ps(poly_mask, y2);
+  y  = _mm_andnot_ps(poly_mask, y);
+  y  = _mm_or_ps(y,y2);
+
+  /* update the sign */
+  return _mm_xor_ps(y, sign_bit);
+}
+
+static EIGEN_UNUSED Packet4f ei_psqrt(Packet4f _x)
+{
+  Packet4f half = ei_pmul(_x, ei_pset1(.5f));
+  Packet4f x = _mm_rsqrt_ps(_x);
+  x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x))));
+  return ei_pmul(_x,x);
+}
+
+#endif // EIGEN_MATH_FUNCTIONS_SSE_H
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -29,140 +29,245 @@
 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 16
 #endif

-template<> struct ei_packet_traits<float>  { typedef __m128  type; enum {size=4}; };
-template<> struct ei_packet_traits<double> { typedef __m128d type; enum {size=2}; };
-template<> struct ei_packet_traits<int>    { typedef __m128i type; enum {size=4}; };
+typedef __m128  Packet4f;
+typedef __m128i Packet4i;
+typedef __m128d Packet2d;

-template<> struct ei_unpacket_traits<__m128>  { typedef float  type; enum {size=4}; };
-template<> struct ei_unpacket_traits<__m128d> { typedef double type; enum {size=2}; };
-template<> struct ei_unpacket_traits<__m128i> { typedef int    type; enum {size=4}; };
+#define ei_vec4f_swizzle1(v,p,q,r,s) \
+  (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))

-template<> inline __m128  ei_padd(const __m128&  a, const __m128&  b) { return _mm_add_ps(a,b); }
-template<> inline __m128d ei_padd(const __m128d& a, const __m128d& b) { return _mm_add_pd(a,b); }
-template<> inline __m128i ei_padd(const __m128i& a, const __m128i& b) { return _mm_add_epi32(a,b); }
+#define ei_vec4i_swizzle1(v,p,q,r,s) \
+  (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))

-template<> inline __m128  ei_psub(const __m128&  a, const __m128&  b) { return _mm_sub_ps(a,b); }
-template<> inline __m128d ei_psub(const __m128d& a, const __m128d& b) { return _mm_sub_pd(a,b); }
-template<> inline __m128i ei_psub(const __m128i& a, const __m128i& b) { return _mm_sub_epi32(a,b); }
+#define ei_vec4f_swizzle2(a,b,p,q,r,s) \
+  (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))

-template<> inline __m128  ei_pmul(const __m128&  a, const __m128&  b) { return _mm_mul_ps(a,b); }
-template<> inline __m128d ei_pmul(const __m128d& a, const __m128d& b) { return _mm_mul_pd(a,b); }
-template<> inline __m128i ei_pmul(const __m128i& a, const __m128i& b)
+#define ei_vec4i_swizzle2(a,b,p,q,r,s) \
+  (_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
+  
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+  const Packet4f ei_p4f_##NAME = ei_pset1<float>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+  const Packet4f ei_p4f_##NAME = _mm_castsi128_ps(ei_pset1<int>(X))
+
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+  const Packet4i ei_p4i_##NAME = ei_pset1<int>(X)
+
+template<> struct ei_packet_traits<float>  : ei_default_packet_traits
 {
-  return _mm_or_si128(
-    _mm_and_si128(
-      _mm_mul_epu32(a,b),
-      _mm_setr_epi32(0xffffffff,0,0xffffffff,0)),
-    _mm_slli_si128(
-      _mm_and_si128(
-        _mm_mul_epu32(_mm_srli_si128(a,4),_mm_srli_si128(b,4)),
-        _mm_setr_epi32(0xffffffff,0,0xffffffff,0)), 4));
+  typedef Packet4f type; enum {size=4};
+  enum {
+    HasSin  = 1,
+    HasCos  = 1,
+    HasLog  = 1,
+    HasExp  = 1,
+    HasSqrt = 1
+  };
+};
+template<> struct ei_packet_traits<double> : ei_default_packet_traits
+{ typedef Packet2d type; enum {size=2}; };
+template<> struct ei_packet_traits<int>    : ei_default_packet_traits
+{ typedef Packet4i type; enum {size=4}; };
+
+template<> struct ei_unpacket_traits<Packet4f> { typedef float  type; enum {size=4}; };
+template<> struct ei_unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
+template<> struct ei_unpacket_traits<Packet4i> { typedef int    type; enum {size=4}; };
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float&  from) { return _mm_set1_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) { return _mm_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int&    from) { return _mm_set1_epi32(from); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d ei_padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d ei_psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_pnegate(const Packet4f& a)
+{
+  const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+  return _mm_xor_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d ei_pnegate(const Packet2d& a)
+{
+  const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
+  return _mm_xor_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ei_pnegate(const Packet4i& a)
+{
+  return ei_psub(_mm_setr_epi32(0,0,0,0), a);
 }

-template<> inline __m128  ei_pdiv(const __m128&  a, const __m128&  b) { return _mm_div_ps(a,b); }
-template<> inline __m128d ei_pdiv(const __m128d& a, const __m128d& b) { return _mm_div_pd(a,b); }
-template<> inline __m128i ei_pdiv(const __m128i& /*a*/, const __m128i& /*b*/)
+template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+  // this version is slightly faster than 4 scalar products
+  return ei_vec4i_swizzle1(
+            ei_vec4i_swizzle2(
+              _mm_mul_epu32(a,b),
+              _mm_mul_epu32(ei_vec4i_swizzle1(a,1,0,3,2),
+                            ei_vec4i_swizzle1(b,1,0,3,2)),
+              0,2,0,2),
+            0,2,1,3);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d ei_pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
 { ei_assert(false && "packet integer division are not supported by SSE");
-  __m128i dummy;
-  return dummy;
+  return ei_pset1<int>(0);
 }

-// for some weird raisons, it has to be overloaded for packet integer
-template<> inline __m128i ei_pmadd(const __m128i& a, const __m128i& b, const __m128i& c) { return ei_padd(ei_pmul(a,b), c); }
+// for some weird raisons, it has to be overloaded for packet of integers
+template<> EIGEN_STRONG_INLINE Packet4i ei_pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return ei_padd(ei_pmul(a,b), c); }

-template<> inline __m128  ei_pmin(const __m128&  a, const __m128&  b) { return _mm_min_ps(a,b); }
-template<> inline __m128d ei_pmin(const __m128d& a, const __m128d& b) { return _mm_min_pd(a,b); }
-// FIXME this vectorized min operator is likely to be slower than the standard one
-template<> inline __m128i ei_pmin(const __m128i& a, const __m128i& b)
+template<> EIGEN_STRONG_INLINE Packet4f ei_pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d ei_pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
 {
-  __m128i mask = _mm_cmplt_epi32(a,b);
+  // after some bench, this version *is* faster than a scalar implementation
+  Packet4i mask = _mm_cmplt_epi32(a,b);
  return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
 }

-template<> inline __m128  ei_pmax(const __m128&  a, const __m128&  b) { return _mm_max_ps(a,b); }
-template<> inline __m128d ei_pmax(const __m128d& a, const __m128d& b) { return _mm_max_pd(a,b); }
-// FIXME this vectorized max operator is likely to be slower than the standard one
-template<> inline __m128i ei_pmax(const __m128i& a, const __m128i& b)
+template<> EIGEN_STRONG_INLINE Packet4f ei_pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d ei_pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
 {
-  __m128i mask = _mm_cmpgt_epi32(a,b);
+  // after some bench, this version *is* faster than a scalar implementation
+  Packet4i mask = _mm_cmpgt_epi32(a,b);
  return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
 }

-template<> inline __m128  ei_pload(const float*   from) { return _mm_load_ps(from); }
-template<> inline __m128d ei_pload(const double*  from) { return _mm_load_pd(from); }
-template<> inline __m128i ei_pload(const int* from) { return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
+template<> EIGEN_STRONG_INLINE Packet4f ei_pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d ei_pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }

-template<> inline __m128  ei_ploadu(const float*   from) { return _mm_loadu_ps(from); }
-// template<> inline __m128  ei_ploadu(const float*   from) {
-//   if (size_t(from)&0xF)
-//     return _mm_loadu_ps(from);
-//   else 
-//     return _mm_loadu_ps(from);
-// }
-template<> inline __m128d ei_ploadu(const double*  from) { return _mm_loadu_pd(from); }
-template<> inline __m128i ei_ploadu(const int* from) { return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); }
+template<> EIGEN_STRONG_INLINE Packet4f ei_por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d ei_por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }

-template<> inline __m128  ei_pset1(const float&  from) { return _mm_set1_ps(from); }
-template<> inline __m128d ei_pset1(const double& from) { return _mm_set1_pd(from); }
-template<> inline __m128i ei_pset1(const int&    from) { return _mm_set1_epi32(from); }
+template<> EIGEN_STRONG_INLINE Packet4f ei_pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d ei_pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }

-template<> inline void ei_pstore(float*  to, const __m128&  from) { _mm_store_ps(to, from); }
-template<> inline void ei_pstore(double* to, const __m128d& from) { _mm_store_pd(to, from); }
-template<> inline void ei_pstore(int*    to, const __m128i& from) { _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d ei_pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }

-template<> inline void ei_pstoreu(float*  to, const __m128&  from) { _mm_storeu_ps(to, from); }
-template<> inline void ei_pstoreu(double* to, const __m128d& from) { _mm_storeu_pd(to, from); }
-template<> inline void ei_pstoreu(int*    to, const __m128i& from) { _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float*    from) { return _mm_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet2d ei_pload<double>(const double*  from) { return _mm_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float*   from) {
+  Packet4f r;
+  r = _mm_castpd_ps(_mm_load_sd((double*)(from)));
+  r = _mm_loadh_pi(r, (const __m64*)(from+2));
+  return r;
+}
+template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<double>(const double*  from) { return _mm_castps_pd(ei_ploadu((const float*)(from))); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<int>(const int* from) { return _mm_castpd_si128(ei_ploadu((const double*)(from))); }
+
+template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float*   to, const Packet4f& from) { _mm_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void ei_pstore<double>(double* to, const Packet2d& from) { _mm_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int*       to, const Packet4i& from) { _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); }
+
+template<> EIGEN_STRONG_INLINE void ei_pstoreu<double>(double* to, const Packet2d& from) {
+  _mm_storel_pd((to), from);
+  _mm_storeh_pd((to+1), from);
+}
+template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float*  to, const Packet4f& from) { ei_pstoreu((double*)to, _mm_castps_pd(from)); }
+template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int*      to, const Packet4i& from) { ei_pstoreu((double*)to, _mm_castsi128_pd(from)); }
+
+#ifdef _MSC_VER
+// this fix internal compilation error
+template<> EIGEN_STRONG_INLINE float  ei_pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
+template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
+template<> EIGEN_STRONG_INLINE int    ei_pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#else
+template<> EIGEN_STRONG_INLINE float  ei_pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
+template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
+template<> EIGEN_STRONG_INLINE int    ei_pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a)
+{ return _mm_shuffle_ps(a,a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet2d ei_preverse(const Packet2d& a)
+{ return _mm_shuffle_pd(a,a,0x1); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a)
+{ return _mm_shuffle_epi32(a,0x1B); }
+
+
+template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a)
+{
+  const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+  return _mm_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d ei_pabs(const Packet2d& a)
+{
+  const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+  return _mm_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
+{
+  #ifdef __SSSE3__
+  return _mm_abs_epi32(a);
+  #else
+  Packet4i aux = _mm_srai_epi32(a,31);
+  return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
+  #endif
+}

-template<> inline float  ei_pfirst(const __m128&  a) { return _mm_cvtss_f32(a); }
-template<> inline double ei_pfirst(const __m128d& a) { return _mm_cvtsd_f64(a); }
-template<> inline int    ei_pfirst(const __m128i& a) { return _mm_cvtsi128_si32(a); }

 #ifdef __SSE3__
 // TODO implement SSE2 versions as well as integer versions
-inline __m128 ei_preduxp(const __m128* vecs)
+template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
 {
  return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
 }
-inline __m128d ei_preduxp(const __m128d* vecs)
+template<> EIGEN_STRONG_INLINE Packet2d ei_preduxp<Packet2d>(const Packet2d* vecs)
 {
  return _mm_hadd_pd(vecs[0], vecs[1]);
 }
 // SSSE3 version:
-// inline __m128i ei_preduxp(const __m128i* vecs)
+// EIGEN_STRONG_INLINE Packet4i ei_preduxp(const Packet4i* vecs)
 // {
 //   return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
 // }

-inline float ei_predux(const __m128& a)
+template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a)
 {
-  __m128 tmp0 = _mm_hadd_ps(a,a);
+  Packet4f tmp0 = _mm_hadd_ps(a,a);
  return ei_pfirst(_mm_hadd_ps(tmp0, tmp0));
 }

-inline double ei_predux(const __m128d& a) { return ei_pfirst(_mm_hadd_pd(a, a)); }
+template<> EIGEN_STRONG_INLINE double ei_predux<Packet2d>(const Packet2d& a) { return ei_pfirst(_mm_hadd_pd(a, a)); }

 // SSSE3 version:
-// inline float ei_predux(const __m128i& a)
+// EIGEN_STRONG_INLINE float ei_predux(const Packet4i& a)
 // {
-//   __m128i tmp0 = _mm_hadd_epi32(a,a);
+//   Packet4i tmp0 = _mm_hadd_epi32(a,a);
 //   return ei_pfirst(_mm_hadd_epi32(tmp0, tmp0));
 // }
 #else
 // SSE2 versions
-inline float ei_predux(const __m128& a)
+template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a)
 {
-  __m128 tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
+  Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
  return ei_pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
 }
-inline double ei_predux(const __m128d& a)
+template<> EIGEN_STRONG_INLINE double ei_predux<Packet2d>(const Packet2d& a)
 {
  return ei_pfirst(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
 }

-inline __m128 ei_preduxp(const __m128* vecs)
+template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
 {
-  __m128 tmp0, tmp1, tmp2;
+  Packet4f tmp0, tmp1, tmp2;
  tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
  tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
  tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
@@ -174,21 +279,21 @@ inline __m128 ei_preduxp(const __m128* vecs)
  return _mm_add_ps(tmp0, tmp2);
 }

-inline __m128d ei_preduxp(const __m128d* vecs)
+template<> EIGEN_STRONG_INLINE Packet2d ei_preduxp<Packet2d>(const Packet2d* vecs)
 {
  return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
 }
 #endif  // SSE3

-inline int ei_predux(const __m128i& a)
+template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a)
 {
-  __m128i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
+  Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
  return ei_pfirst(tmp) + ei_pfirst(_mm_shuffle_epi32(tmp, 1));
 }

-inline __m128i ei_preduxp(const __m128i* vecs)
+template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vecs)
 {
-  __m128i tmp0, tmp1, tmp2;
+  Packet4i tmp0, tmp1, tmp2;
  tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
  tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
  tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
@@ -200,16 +305,80 @@ inline __m128i ei_preduxp(const __m128i* vecs)
  return _mm_add_epi32(tmp0, tmp2);
 }

+// Other reduction functions:
+
+// mul
+template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a)
+{
+  Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
+  return ei_pfirst(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double ei_predux_mul<Packet2d>(const Packet2d& a)
+{
+  return ei_pfirst(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a)
+{
+  // after some experiments, it is seems this is the fastest way to implement it
+  // for GCC (eg., reusing ei_pmul is very slow !)
+  // TODO try to call _mm_mul_epu32 directly
+  EIGEN_ALIGN_128 int aux[4];
+  ei_pstore(aux, a);
+  return  (aux[0] * aux[1]) * (aux[2] * aux[3]);;
+}
+
+// min
+template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a)
+{
+  Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
+  return ei_pfirst(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double ei_predux_min<Packet2d>(const Packet2d& a)
+{
+  return ei_pfirst(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a)
+{
+  // after some experiments, it is seems this is the fastest way to implement it
+  // for GCC (eg., it does not like using std::min after the ei_pstore !!)
+  EIGEN_ALIGN_128 int aux[4];
+  ei_pstore(aux, a);
+  register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
+  register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
+  return aux0<aux2 ? aux0 : aux2;
+}
+
+// max
+template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a)
+{
+  Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
+  return ei_pfirst(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double ei_predux_max<Packet2d>(const Packet2d& a)
+{
+  return ei_pfirst(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
+{
+  // after some experiments, it is seems this is the fastest way to implement it
+  // for GCC (eg., it does not like using std::min after the ei_pstore !!)
+  EIGEN_ALIGN_128 int aux[4];
+  ei_pstore(aux, a);
+  register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
+  register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
+  return aux0>aux2 ? aux0 : aux2;
+}
+
 #if (defined __GNUC__)
-// template <> inline __m128 ei_pmadd(const __m128&  a, const __m128&  b, const __m128&  c)
+// template <> EIGEN_STRONG_INLINE Packet4f ei_pmadd(const Packet4f&  a, const Packet4f&  b, const Packet4f&  c)
 // {
-//   __m128 res = b;
+//   Packet4f res = b;
 //   asm("mulps %[a], %[b] \n\taddps %[c], %[b]" : [b] "+x" (res) : [a] "x" (a), [c] "x" (c));
 //   return res;
 // }
-// inline __m128i _mm_alignr_epi8(const __m128i&  a, const __m128i&  b, const int i)
+// EIGEN_STRONG_INLINE Packet4i _mm_alignr_epi8(const Packet4i&  a, const Packet4i&  b, const int i)
 // {
-//   __m128i res = a;
+//   Packet4i res = a;
 //   asm("palignr %[i], %[a], %[b] " : [b] "+x" (res) : [a] "x" (a), [i] "i" (i));
 //   return res;
 // }
@@ -218,9 +387,9 @@ inline __m128i ei_preduxp(const __m128i* vecs)
 #ifdef __SSSE3__
 // SSSE3 versions
 template<int Offset>
-struct ei_palign_impl<Offset,__m128>
+struct ei_palign_impl<Offset,Packet4f>
 {
-  inline static void run(__m128& first, const __m128& second)
+  EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
  {
    if (Offset!=0)
      first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
@@ -228,9 +397,9 @@ struct ei_palign_impl<Offset,__m128>
 };

 template<int Offset>
-struct ei_palign_impl<Offset,__m128i>
+struct ei_palign_impl<Offset,Packet4i>
 {
-  inline static void run(__m128i& first, const __m128i& second)
+  EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
  {
    if (Offset!=0)
      first = _mm_alignr_epi8(second,first, Offset*4);
@@ -238,9 +407,9 @@ struct ei_palign_impl<Offset,__m128i>
 };

 template<int Offset>
-struct ei_palign_impl<Offset,__m128d>
+struct ei_palign_impl<Offset,Packet2d>
 {
-  inline static void run(__m128d& first, const __m128d& second)
+  EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
  {
    if (Offset==1)
      first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
@@ -249,9 +418,9 @@ struct ei_palign_impl<Offset,__m128d>
 #else
 // SSE2 versions
 template<int Offset>
-struct ei_palign_impl<Offset,__m128>
+struct ei_palign_impl<Offset,Packet4f>
 {
-  inline static void run(__m128& first, const __m128& second)
+  EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
  {
    if (Offset==1)
    {
@@ -272,9 +441,9 @@ struct ei_palign_impl<Offset,__m128>
 };

 template<int Offset>
-struct ei_palign_impl<Offset,__m128i>
+struct ei_palign_impl<Offset,Packet4i>
 {
-  inline static void run(__m128i& first, const __m128i& second)
+  EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
  {
    if (Offset==1)
    {
@@ -295,9 +464,9 @@ struct ei_palign_impl<Offset,__m128i>
 };

 template<int Offset>
-struct ei_palign_impl<Offset,__m128d>
+struct ei_palign_impl<Offset,Packet2d>
 {
-  inline static void run(__m128d& first, const __m128d& second)
+  EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
  {
    if (Offset==1)
    {
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -0,0 +1,490 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_H
+
+template <int L2MemorySize,typename Scalar>
+struct ei_L2_block_traits {
+  enum {width = 8 * ei_meta_sqrt<L2MemorySize/(64*sizeof(Scalar))>::ret };
+};
+
+#ifndef EIGEN_EXTERN_INSTANTIATIONS
+
+template<typename Scalar>
+static void ei_cache_friendly_product(
+  int _rows, int _cols, int depth,
+  bool _lhsRowMajor, const Scalar* _lhs, int _lhsStride,
+  bool _rhsRowMajor, const Scalar* _rhs, int _rhsStride,
+  bool resRowMajor, Scalar* res, int resStride)
+{
+  const Scalar* EIGEN_RESTRICT lhs;
+  const Scalar* EIGEN_RESTRICT rhs;
+  int lhsStride, rhsStride, rows, cols;
+  bool lhsRowMajor;
+
+  if (resRowMajor)
+  {
+    lhs = _rhs;
+    rhs = _lhs;
+    lhsStride = _rhsStride;
+    rhsStride = _lhsStride;
+    cols = _rows;
+    rows = _cols;
+    lhsRowMajor = !_rhsRowMajor;
+    ei_assert(_lhsRowMajor);
+  }
+  else
+  {
+    lhs = _lhs;
+    rhs = _rhs;
+    lhsStride = _lhsStride;
+    rhsStride = _rhsStride;
+    rows = _rows;
+    cols = _cols;
+    lhsRowMajor = _lhsRowMajor;
+    ei_assert(!_rhsRowMajor);
+  }
+
+  typedef typename ei_packet_traits<Scalar>::type PacketType;
+
+
+
+#ifndef EIGEN_USE_ALT_PRODUCT
+
+  enum {
+    PacketSize = sizeof(PacketType)/sizeof(Scalar),
+    #if (defined __i386__)
+    HalfRegisterCount = 4,
+    #else
+    HalfRegisterCount = 8,
+    #endif
+
+    // register block size along the N direction
+    nr = HalfRegisterCount/2,
+
+    // register block size along the M direction
+    mr = 2 * PacketSize,
+
+    // max cache block size along the K direction
+    Max_kc = ei_L2_block_traits<EIGEN_TUNE_FOR_CPU_CACHE_SIZE,Scalar>::width,
+
+    // max cache block size along the M direction
+    Max_mc = 2*Max_kc
+  };
+
+  int kc = std::min<int>(Max_kc,depth);  // cache block size along the K direction
+  int mc = std::min<int>(Max_mc,rows);   // cache block size along the M direction
+
+  Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
+  Scalar* blockB = ei_aligned_stack_new(Scalar, kc*cols*PacketSize);
+
+  // number of columns which can be processed by packet of nr columns
+  int packet_cols = (cols/nr)*nr;
+
+  // GEMM_VAR1
+  for(int k2=0; k2<depth; k2+=kc)
+  {
+    const int actual_kc = std::min(k2+kc,depth)-k2;
+
+    // we have selected one row panel of rhs and one column panel of lhs
+    // pack rhs's panel into a sequential chunk of memory
+    // and expand each coeff to a constant packet for further reuse
+    {
+      int count = 0;
+      for(int j2=0; j2<packet_cols; j2+=nr)
+      {
+        const Scalar* b0 = &rhs[(j2+0)*rhsStride + k2];
+        const Scalar* b1 = &rhs[(j2+1)*rhsStride + k2];
+        const Scalar* b2 = &rhs[(j2+2)*rhsStride + k2];
+        const Scalar* b3 = &rhs[(j2+3)*rhsStride + k2];
+        for(int k=0; k<actual_kc; k++)
+        {
+          ei_pstore(&blockB[count+0*PacketSize], ei_pset1(b0[k]));
+          ei_pstore(&blockB[count+1*PacketSize], ei_pset1(b1[k]));
+          if (nr==4)
+          {
+            ei_pstore(&blockB[count+2*PacketSize], ei_pset1(b2[k]));
+            ei_pstore(&blockB[count+3*PacketSize], ei_pset1(b3[k]));
+          }
+          count += nr*PacketSize;
+        }
+      }
+    }
+
+    // => GEPP_VAR1
+    for(int i2=0; i2<rows; i2+=mc)
+    {
+      const int actual_mc = std::min(i2+mc,rows)-i2;
+
+      // We have selected a mc x kc block of lhs
+      // Let's pack it in a clever order for further purely sequential access
+      int count = 0;
+      const int peeled_mc = (actual_mc/mr)*mr;
+      if (lhsRowMajor)
+      {
+        for(int i=0; i<peeled_mc; i+=mr)
+          for(int k=0; k<actual_kc; k++)
+            for(int w=0; w<mr; w++)
+              blockA[count++] = lhs[(k2+k) + (i2+i+w)*lhsStride];
+        for(int i=peeled_mc; i<actual_mc; i++)
+        {
+          const Scalar* llhs = &lhs[(k2) + (i2+i)*lhsStride];
+          for(int k=0; k<actual_kc; k++)
+            blockA[count++] = llhs[k];
+        }
+      }
+      else
+      {
+        for(int i=0; i<peeled_mc; i+=mr)
+          for(int k=0; k<actual_kc; k++)
+            for(int w=0; w<mr; w++)
+              blockA[count++] = lhs[(k2+k)*lhsStride + i2+i+w];
+        for(int i=peeled_mc; i<actual_mc; i++)
+          for(int k=0; k<actual_kc; k++)
+            blockA[count++] = lhs[(k2+k)*lhsStride + i2+i];
+      }
+
+      // GEBP
+      // loops on each cache friendly block of the result/rhs
+      for(int j2=0; j2<packet_cols; j2+=nr)
+      {
+        // loops on each register blocking of lhs/res
+        const int peeled_mc = (actual_mc/mr)*mr;
+        for(int i=0; i<peeled_mc; i+=mr)
+        {
+          const Scalar* blA = &blockA[i*actual_kc];
+          #ifdef EIGEN_VECTORIZE_SSE
+          _mm_prefetch((const char*)(&blA[0]), _MM_HINT_T0);
+          #endif
+
+          // TODO move the res loads to the stores
+
+          // gets res block as register
+          PacketType C0, C1, C2, C3, C4, C5, C6, C7;
+                    C0 = ei_ploadu(&res[(j2+0)*resStride + i2 + i]);
+                    C1 = ei_ploadu(&res[(j2+1)*resStride + i2 + i]);
+          if(nr==4) C2 = ei_ploadu(&res[(j2+2)*resStride + i2 + i]);
+          if(nr==4) C3 = ei_ploadu(&res[(j2+3)*resStride + i2 + i]);
+                    C4 = ei_ploadu(&res[(j2+0)*resStride + i2 + i + PacketSize]);
+                    C5 = ei_ploadu(&res[(j2+1)*resStride + i2 + i + PacketSize]);
+          if(nr==4) C6 = ei_ploadu(&res[(j2+2)*resStride + i2 + i + PacketSize]);
+          if(nr==4) C7 = ei_ploadu(&res[(j2+3)*resStride + i2 + i + PacketSize]);
+
+          // performs "inner" product
+          // TODO let's check wether the flowing peeled loop could not be
+          //      optimized via optimal prefetching from one loop to the other
+          const Scalar* blB = &blockB[j2*actual_kc*PacketSize];
+          const int peeled_kc = (actual_kc/4)*4;
+          for(int k=0; k<peeled_kc; k+=4)
+          {
+            PacketType B0, B1, B2, B3, A0, A1;
+
+                      A0 = ei_pload(&blA[0*PacketSize]);
+                      A1 = ei_pload(&blA[1*PacketSize]);
+                      B0 = ei_pload(&blB[0*PacketSize]);
+                      B1 = ei_pload(&blB[1*PacketSize]);
+                      C0 = ei_pmadd(B0, A0, C0);
+            if(nr==4) B2 = ei_pload(&blB[2*PacketSize]);
+                      C4 = ei_pmadd(B0, A1, C4);
+            if(nr==4) B3 = ei_pload(&blB[3*PacketSize]);
+                      B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]);
+                      C1 = ei_pmadd(B1, A0, C1);
+                      C5 = ei_pmadd(B1, A1, C5);
+                      B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]);
+            if(nr==4) C2 = ei_pmadd(B2, A0, C2);
+            if(nr==4) C6 = ei_pmadd(B2, A1, C6);
+            if(nr==4) B2 = ei_pload(&blB[6*PacketSize]);
+            if(nr==4) C3 = ei_pmadd(B3, A0, C3);
+                      A0 = ei_pload(&blA[2*PacketSize]);
+            if(nr==4) C7 = ei_pmadd(B3, A1, C7);
+                      A1 = ei_pload(&blA[3*PacketSize]);
+            if(nr==4) B3 = ei_pload(&blB[7*PacketSize]);
+                      C0 = ei_pmadd(B0, A0, C0);
+                      C4 = ei_pmadd(B0, A1, C4);
+                      B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]);
+                      C1 = ei_pmadd(B1, A0, C1);
+                      C5 = ei_pmadd(B1, A1, C5);
+                      B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]);
+            if(nr==4) C2 = ei_pmadd(B2, A0, C2);
+            if(nr==4) C6 = ei_pmadd(B2, A1, C6);
+            if(nr==4) B2 = ei_pload(&blB[10*PacketSize]);
+            if(nr==4) C3 = ei_pmadd(B3, A0, C3);
+                      A0 = ei_pload(&blA[4*PacketSize]);
+            if(nr==4) C7 = ei_pmadd(B3, A1, C7);
+                      A1 = ei_pload(&blA[5*PacketSize]);
+            if(nr==4) B3 = ei_pload(&blB[11*PacketSize]);
+
+                      C0 = ei_pmadd(B0, A0, C0);
+                      C4 = ei_pmadd(B0, A1, C4);
+                      B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]);
+                      C1 = ei_pmadd(B1, A0, C1);
+                      C5 = ei_pmadd(B1, A1, C5);
+                      B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]);
+            if(nr==4) C2 = ei_pmadd(B2, A0, C2);
+            if(nr==4) C6 = ei_pmadd(B2, A1, C6);
+            if(nr==4) B2 = ei_pload(&blB[14*PacketSize]);
+            if(nr==4) C3 = ei_pmadd(B3, A0, C3);
+                      A0 = ei_pload(&blA[6*PacketSize]);
+            if(nr==4) C7 = ei_pmadd(B3, A1, C7);
+                      A1 = ei_pload(&blA[7*PacketSize]);
+            if(nr==4) B3 = ei_pload(&blB[15*PacketSize]);
+                      C0 = ei_pmadd(B0, A0, C0);
+                      C4 = ei_pmadd(B0, A1, C4);
+                      C1 = ei_pmadd(B1, A0, C1);
+                      C5 = ei_pmadd(B1, A1, C5);
+            if(nr==4) C2 = ei_pmadd(B2, A0, C2);
+            if(nr==4) C6 = ei_pmadd(B2, A1, C6);
+            if(nr==4) C3 = ei_pmadd(B3, A0, C3);
+            if(nr==4) C7 = ei_pmadd(B3, A1, C7);
+
+            blB += 4*nr*PacketSize;
+            blA += 4*mr;
+          }
+          // process remaining peeled loop
+          for(int k=peeled_kc; k<actual_kc; k++)
+          {
+            PacketType B0, B1, B2, B3, A0, A1;
+
+                      A0 = ei_pload(&blA[0*PacketSize]);
+                      A1 = ei_pload(&blA[1*PacketSize]);
+                      B0 = ei_pload(&blB[0*PacketSize]);
+                      B1 = ei_pload(&blB[1*PacketSize]);
+                      C0 = ei_pmadd(B0, A0, C0);
+            if(nr==4) B2 = ei_pload(&blB[2*PacketSize]);
+                      C4 = ei_pmadd(B0, A1, C4);
+            if(nr==4) B3 = ei_pload(&blB[3*PacketSize]);
+                      C1 = ei_pmadd(B1, A0, C1);
+                      C5 = ei_pmadd(B1, A1, C5);
+            if(nr==4) C2 = ei_pmadd(B2, A0, C2);
+            if(nr==4) C6 = ei_pmadd(B2, A1, C6);
+            if(nr==4) C3 = ei_pmadd(B3, A0, C3);
+            if(nr==4) C7 = ei_pmadd(B3, A1, C7);
+
+            blB += nr*PacketSize;
+            blA += mr;
+          }
+
+                    ei_pstoreu(&res[(j2+0)*resStride + i2 + i], C0);
+                    ei_pstoreu(&res[(j2+1)*resStride + i2 + i], C1);
+          if(nr==4) ei_pstoreu(&res[(j2+2)*resStride + i2 + i], C2);
+          if(nr==4) ei_pstoreu(&res[(j2+3)*resStride + i2 + i], C3);
+                    ei_pstoreu(&res[(j2+0)*resStride + i2 + i + PacketSize], C4);
+                    ei_pstoreu(&res[(j2+1)*resStride + i2 + i + PacketSize], C5);
+          if(nr==4) ei_pstoreu(&res[(j2+2)*resStride + i2 + i + PacketSize], C6);
+          if(nr==4) ei_pstoreu(&res[(j2+3)*resStride + i2 + i + PacketSize], C7);
+        }
+        for(int i=peeled_mc; i<actual_mc; i++)
+        {
+          const Scalar* blA = &blockA[i*actual_kc];
+          #ifdef EIGEN_VECTORIZE_SSE
+          _mm_prefetch((const char*)(&blA[0]), _MM_HINT_T0);
+          #endif
+
+          // gets a 1 x nr res block as registers
+          Scalar C0(0), C1(0), C2(0), C3(0);
+          const Scalar* blB = &blockB[j2*actual_kc*PacketSize];
+          for(int k=0; k<actual_kc; k++)
+          {
+            Scalar B0, B1, B2, B3, A0;
+
+                      A0 =  blA[k];
+                      B0 =  blB[0*PacketSize];
+                      B1 =  blB[1*PacketSize];
+                      C0 += B0 * A0;
+            if(nr==4) B2 =  blB[2*PacketSize];
+            if(nr==4) B3 =  blB[3*PacketSize];
+                      C1 += B1 * A0;
+            if(nr==4) C2 += B2 * A0;
+            if(nr==4) C3 += B3 * A0;
+
+            blB += nr*PacketSize;
+          }
+          res[(j2+0)*resStride + i2 + i] += C0;
+          res[(j2+1)*resStride + i2 + i] += C1;
+          if(nr==4) res[(j2+2)*resStride + i2 + i] += C2;
+          if(nr==4) res[(j2+3)*resStride + i2 + i] += C3;
+        }
+      }
+      // remaining rhs/res columns (<nr)
+      for(int j2=packet_cols; j2<cols; j2++)
+      {
+        for(int i=0; i<actual_mc; i++)
+        {
+          Scalar c0 = res[(j2)*resStride + i2+i];
+          if (lhsRowMajor)
+            for(int k=0; k<actual_kc; k++)
+              c0 += lhs[(k2+k)+(i2+i)*lhsStride] * rhs[j2*rhsStride + k2 + k];
+          else
+            for(int k=0; k<actual_kc; k++)
+              c0 += lhs[(k2+k)*lhsStride + i2+i] * rhs[j2*rhsStride + k2 + k];
+          res[(j2)*resStride + i2+i] = c0;
+        }
+      }
+    }
+  }
+
+  ei_aligned_stack_delete(Scalar, blockA, kc*mc);
+  ei_aligned_stack_delete(Scalar, blockB, kc*cols*PacketSize);
+
+#else // alternate product from cylmor
+
+  enum {
+    PacketSize = sizeof(PacketType)/sizeof(Scalar),
+    #if (defined __i386__)
+    // i386 architecture provides only 8 xmm registers,
+    // so let's reduce the max number of rows processed at once.
+    MaxBlockRows = 4,
+    MaxBlockRows_ClampingMask = 0xFFFFFC,
+    #else
+    MaxBlockRows = 8,
+    MaxBlockRows_ClampingMask = 0xFFFFF8,
+    #endif
+    // maximal size of the blocks fitted in L2 cache
+    MaxL2BlockSize = ei_L2_block_traits<EIGEN_TUNE_FOR_CPU_CACHE_SIZE,Scalar>::width
+  };
+
+  const bool resIsAligned = (PacketSize==1) || (((resStride%PacketSize) == 0) && (size_t(res)%16==0));
+
+  const int remainingSize = depth % PacketSize;
+  const int size = depth - remainingSize; // third dimension of the product clamped to packet boundaries
+
+  const int l2BlockRows = MaxL2BlockSize > rows ? rows : 512;
+  const int l2BlockCols = MaxL2BlockSize > cols ? cols : 128;
+  const int l2BlockSize = MaxL2BlockSize > size ? size : 256;
+  const int l2BlockSizeAligned = (1 + std::max(l2BlockSize,l2BlockCols)/PacketSize)*PacketSize;
+  const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0));
+
+  Scalar* EIGEN_RESTRICT block = new Scalar[l2BlockRows*size];
+//   for(int i=0; i<l2BlockRows*l2BlockSize; ++i)
+//     block[i] = 0;
+  // loops on each L2 cache friendly blocks of lhs
+  for(int l2k=0; l2k<depth; l2k+=l2BlockSize)
+  {
+    for(int l2i=0; l2i<rows; l2i+=l2BlockRows)
+    {
+      // We have selected a block of lhs
+      // Packs this block into 'block'
+      int count = 0;
+      for(int k=0; k<l2BlockSize; k+=MaxBlockRows)
+      {
+        for(int i=0; i<l2BlockRows; i+=2*PacketSize)
+          for (int w=0; w<MaxBlockRows; ++w)
+            for (int y=0; y<2*PacketSize; ++y)
+              block[count++] = lhs[(k+l2k+w)*lhsStride + l2i+i+ y];
+      }
+
+      // loops on each L2 cache firendly block of the result/rhs
+      for(int l2j=0; l2j<cols; l2j+=l2BlockCols)
+      {
+        for(int k=0; k<l2BlockSize; k+=MaxBlockRows)
+        {
+          for(int j=0; j<l2BlockCols; ++j)
+          {
+            PacketType A0, A1, A2, A3, A4, A5, A6, A7;
+
+            // Load the packets from rhs and reorder them
+
+            // Here we need some vector reordering
+            // Right now its hardcoded to packets of 4 elements
+            const Scalar* lrhs = &rhs[(j+l2j)*rhsStride+(k+l2k)];
+            A0 = ei_pset1(lrhs[0]);
+            A1 = ei_pset1(lrhs[1]);
+            A2 = ei_pset1(lrhs[2]);
+            A3 = ei_pset1(lrhs[3]);
+            if (MaxBlockRows==8)
+            {
+              A4 = ei_pset1(lrhs[4]);
+              A5 = ei_pset1(lrhs[5]);
+              A6 = ei_pset1(lrhs[6]);
+              A7 = ei_pset1(lrhs[7]);
+            }
+
+            Scalar * lb = &block[l2BlockRows * k];
+            for(int i=0; i<l2BlockRows; i+=2*PacketSize)
+            {
+              PacketType R0, R1, L0, L1, T0, T1;
+
+              // We perform "cross products" of vectors to avoid
+              // reductions (horizontal ops) afterwards
+              T0 = ei_pload(&res[(j+l2j)*resStride+l2i+i]);
+              T1 = ei_pload(&res[(j+l2j)*resStride+l2i+i+PacketSize]);
+
+              R0 = ei_pload(&lb[0*PacketSize]);
+              L0 = ei_pload(&lb[1*PacketSize]);
+              R1 = ei_pload(&lb[2*PacketSize]);
+              L1 = ei_pload(&lb[3*PacketSize]);
+              T0 = ei_pmadd(R0, A0, T0);
+              T1 = ei_pmadd(L0, A0, T1);
+              R0 = ei_pload(&lb[4*PacketSize]);
+              L0 = ei_pload(&lb[5*PacketSize]);
+              T0 = ei_pmadd(R1, A1, T0);
+              T1 = ei_pmadd(L1, A1, T1);
+              R1 = ei_pload(&lb[6*PacketSize]);
+              L1 = ei_pload(&lb[7*PacketSize]);
+              T0 = ei_pmadd(R0, A2, T0);
+              T1 = ei_pmadd(L0, A2, T1);
+              if(MaxBlockRows==8)
+              {
+                R0 = ei_pload(&lb[8*PacketSize]);
+                L0 = ei_pload(&lb[9*PacketSize]);
+              }
+              T0 = ei_pmadd(R1, A3, T0);
+              T1 = ei_pmadd(L1, A3, T1);
+              if(MaxBlockRows==8)
+              {
+                R1 = ei_pload(&lb[10*PacketSize]);
+                L1 = ei_pload(&lb[11*PacketSize]);
+                T0 = ei_pmadd(R0, A4, T0);
+                T1 = ei_pmadd(L0, A4, T1);
+                R0 = ei_pload(&lb[12*PacketSize]);
+                L0 = ei_pload(&lb[13*PacketSize]);
+                T0 = ei_pmadd(R1, A5, T0);
+                T1 = ei_pmadd(L1, A5, T1);
+                R1 = ei_pload(&lb[14*PacketSize]);
+                L1 = ei_pload(&lb[15*PacketSize]);
+                T0 = ei_pmadd(R0, A6, T0);
+                T1 = ei_pmadd(L0, A6, T1);
+                T0 = ei_pmadd(R1, A7, T0);
+                T1 = ei_pmadd(L1, A7, T1);
+              }
+              lb += MaxBlockRows*2*PacketSize;
+
+              ei_pstore(&res[(j+l2j)*resStride+l2i+i], T0);
+              ei_pstore(&res[(j+l2j)*resStride+l2i+i+PacketSize], T1);
+            }
+          }
+        }
+      }
+    }
+  }
+  delete[] block;
+#endif
+
+
+}
+
+#endif // EIGEN_EXTERN_INSTANTIATIONS
+
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_H
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -22,327 +22,8 @@
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.

-#ifndef EIGEN_CACHE_FRIENDLY_PRODUCT_H
-#define EIGEN_CACHE_FRIENDLY_PRODUCT_H
-
-template <int L2MemorySize,typename Scalar>
-struct ei_L2_block_traits {
-  enum {width = 8 * ei_meta_sqrt<L2MemorySize/(64*sizeof(Scalar))>::ret };
-};
-
-#ifndef EIGEN_EXTERN_INSTANTIATIONS
-
-template<typename Scalar>
-static void ei_cache_friendly_product(
-  int _rows, int _cols, int depth,
-  bool _lhsRowMajor, const Scalar* _lhs, int _lhsStride,
-  bool _rhsRowMajor, const Scalar* _rhs, int _rhsStride,
-  bool resRowMajor, Scalar* res, int resStride)
-{
-  const Scalar* EIGEN_RESTRICT lhs;
-  const Scalar* EIGEN_RESTRICT rhs;
-  int lhsStride, rhsStride, rows, cols;
-  bool lhsRowMajor;
-
-  if (resRowMajor)
-  {
-    lhs = _rhs;
-    rhs = _lhs;
-    lhsStride = _rhsStride;
-    rhsStride = _lhsStride;
-    cols = _rows;
-    rows = _cols;
-    lhsRowMajor = !_rhsRowMajor;
-    ei_assert(_lhsRowMajor);
-  }
-  else
-  {
-    lhs = _lhs;
-    rhs = _rhs;
-    lhsStride = _lhsStride;
-    rhsStride = _rhsStride;
-    rows = _rows;
-    cols = _cols;
-    lhsRowMajor = _lhsRowMajor;
-    ei_assert(!_rhsRowMajor);
-  }
-
-  typedef typename ei_packet_traits<Scalar>::type PacketType;
-
-  enum {
-    PacketSize = sizeof(PacketType)/sizeof(Scalar),
-    #if (defined __i386__)
-    // i386 architecture provides only 8 xmm registers,
-    // so let's reduce the max number of rows processed at once.
-    MaxBlockRows = 4,
-    MaxBlockRows_ClampingMask = 0xFFFFFC,
-    #else
-    MaxBlockRows = 8,
-    MaxBlockRows_ClampingMask = 0xFFFFF8,
-    #endif
-    // maximal size of the blocks fitted in L2 cache
-    MaxL2BlockSize = ei_L2_block_traits<EIGEN_TUNE_FOR_L2_CACHE_SIZE,Scalar>::width
-  };
-
-  const bool resIsAligned = (PacketSize==1) || (((resStride%PacketSize) == 0) && (size_t(res)%16==0));
-
-  const int remainingSize = depth % PacketSize;
-  const int size = depth - remainingSize; // third dimension of the product clamped to packet boundaries
-  const int l2BlockRows = MaxL2BlockSize > rows ? rows : MaxL2BlockSize;
-  const int l2BlockCols = MaxL2BlockSize > cols ? cols : MaxL2BlockSize;
-  const int l2BlockSize = MaxL2BlockSize > size ? size : MaxL2BlockSize;
-  const int l2BlockSizeAligned = (1 + std::max(l2BlockSize,l2BlockCols)/PacketSize)*PacketSize;
-  const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0));
-  Scalar* EIGEN_RESTRICT block = 0;
-  const int allocBlockSize = l2BlockRows*size;
-  block = ei_alloc_stack(Scalar, allocBlockSize);
-  Scalar* EIGEN_RESTRICT rhsCopy
-    = ei_alloc_stack(Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
-
-  // loops on each L2 cache friendly blocks of the result
-  for(int l2i=0; l2i<rows; l2i+=l2BlockRows)
-  {
-    const int l2blockRowEnd = std::min(l2i+l2BlockRows, rows);
-    const int l2blockRowEndBW = l2blockRowEnd & MaxBlockRows_ClampingMask;    // end of the rows aligned to bw
-    const int l2blockRemainingRows = l2blockRowEnd - l2blockRowEndBW;         // number of remaining rows
-    //const int l2blockRowEndBWPlusOne = l2blockRowEndBW + (l2blockRemainingRows?0:MaxBlockRows);
-
-    // build a cache friendly blocky matrix
-    int count = 0;
-
-    // copy l2blocksize rows of m_lhs to blocks of ps x bw
-    for(int l2k=0; l2k<size; l2k+=l2BlockSize)
-    {
-      const int l2blockSizeEnd = std::min(l2k+l2BlockSize, size);
-
-      for (int i = l2i; i<l2blockRowEndBW/*PlusOne*/; i+=MaxBlockRows)
-      {
-        // TODO merge the "if l2blockRemainingRows" using something like:
-        // const int blockRows = std::min(i+MaxBlockRows, rows) - i;
-
-        for (int k=l2k; k<l2blockSizeEnd; k+=PacketSize)
-        {
-          // TODO write these loops using meta unrolling
-          // negligible for large matrices but useful for small ones
-          if (lhsRowMajor)
-          {
-            for (int w=0; w<MaxBlockRows; ++w)
-              for (int s=0; s<PacketSize; ++s)
-                block[count++] = lhs[(i+w)*lhsStride + (k+s)];
-          }
-          else
-          {
-            for (int w=0; w<MaxBlockRows; ++w)
-              for (int s=0; s<PacketSize; ++s)
-                block[count++] = lhs[(i+w) + (k+s)*lhsStride];
-          }
-        }
-      }
-      if (l2blockRemainingRows>0)
-      {
-        for (int k=l2k; k<l2blockSizeEnd; k+=PacketSize)
-        {
-          if (lhsRowMajor)
-          {
-            for (int w=0; w<l2blockRemainingRows; ++w)
-              for (int s=0; s<PacketSize; ++s)
-                block[count++] = lhs[(l2blockRowEndBW+w)*lhsStride + (k+s)];
-          }
-          else
-          {
-            for (int w=0; w<l2blockRemainingRows; ++w)
-              for (int s=0; s<PacketSize; ++s)
-                block[count++] = lhs[(l2blockRowEndBW+w) + (k+s)*lhsStride];
-          }
-        }
-      }
-    }
-
-    for(int l2j=0; l2j<cols; l2j+=l2BlockCols)
-    {
-      int l2blockColEnd = std::min(l2j+l2BlockCols, cols);
-
-      for(int l2k=0; l2k<size; l2k+=l2BlockSize)
-      {
-        // acumulate bw rows of lhs time a single column of rhs to a bw x 1 block of res
-        int l2blockSizeEnd = std::min(l2k+l2BlockSize, size);
-
-        // if not aligned, copy the rhs block
-        if (needRhsCopy)
-          for(int l1j=l2j; l1j<l2blockColEnd; l1j+=1)
-          {
-            ei_internal_assert(l2BlockSizeAligned*(l1j-l2j)+(l2blockSizeEnd-l2k) < l2BlockSizeAligned*l2BlockSizeAligned);
-            memcpy(rhsCopy+l2BlockSizeAligned*(l1j-l2j),&(rhs[l1j*rhsStride+l2k]),(l2blockSizeEnd-l2k)*sizeof(Scalar));
-          }
-
-        // for each bw x 1 result's block
-        for(int l1i=l2i; l1i<l2blockRowEndBW; l1i+=MaxBlockRows)
-        {
-          int offsetblock = l2k * (l2blockRowEnd-l2i) + (l1i-l2i)*(l2blockSizeEnd-l2k) - l2k*MaxBlockRows;
-          const Scalar* EIGEN_RESTRICT localB = &block[offsetblock];
-          
-          for(int l1j=l2j; l1j<l2blockColEnd; l1j+=1)
-          {
-            const Scalar* EIGEN_RESTRICT rhsColumn;
-            if (needRhsCopy)
-              rhsColumn = &(rhsCopy[l2BlockSizeAligned*(l1j-l2j)-l2k]);
-            else
-              rhsColumn = &(rhs[l1j*rhsStride]);
-
-            PacketType dst[MaxBlockRows];
-            dst[3] = dst[2] = dst[1] = dst[0] = ei_pset1(Scalar(0.));
-            if (MaxBlockRows==8)
-              dst[7] = dst[6] = dst[5] = dst[4] = dst[0];
-
-            PacketType tmp;
-
-            for(int k=l2k; k<l2blockSizeEnd; k+=PacketSize)
-            {
-              tmp = ei_ploadu(&rhsColumn[k]);
-              PacketType A0, A1, A2, A3, A4, A5;
-              A0 = ei_pload(localB + k*MaxBlockRows);
-              A1 = ei_pload(localB + k*MaxBlockRows+1*PacketSize);
-              A2 = ei_pload(localB + k*MaxBlockRows+2*PacketSize);
-              A3 = ei_pload(localB + k*MaxBlockRows+3*PacketSize);
-              if (MaxBlockRows==8) A4 = ei_pload(localB + k*MaxBlockRows+4*PacketSize);
-              if (MaxBlockRows==8) A5 = ei_pload(localB + k*MaxBlockRows+5*PacketSize);
-              dst[0] = ei_pmadd(tmp, A0, dst[0]);
-              if (MaxBlockRows==8) A0 = ei_pload(localB + k*MaxBlockRows+6*PacketSize);
-              dst[1] = ei_pmadd(tmp, A1, dst[1]);
-              if (MaxBlockRows==8) A1 = ei_pload(localB + k*MaxBlockRows+7*PacketSize);
-              dst[2] = ei_pmadd(tmp, A2, dst[2]);
-              dst[3] = ei_pmadd(tmp, A3, dst[3]);
-              if (MaxBlockRows==8)
-              {
-                dst[4] = ei_pmadd(tmp, A4, dst[4]);
-                dst[5] = ei_pmadd(tmp, A5, dst[5]);
-                dst[6] = ei_pmadd(tmp, A0, dst[6]);
-                dst[7] = ei_pmadd(tmp, A1, dst[7]);
-              }
-            }
-
-            Scalar* EIGEN_RESTRICT localRes = &(res[l1i + l1j*resStride]);
-
-            if (PacketSize>1 && resIsAligned)
-            {
-              // the result is aligned: let's do packet reduction
-              ei_pstore(&(localRes[0]), ei_padd(ei_pload(&(localRes[0])), ei_preduxp(&dst[0])));
-              if (PacketSize==2)
-                ei_pstore(&(localRes[2]), ei_padd(ei_pload(&(localRes[2])), ei_preduxp(&(dst[2]))));
-              if (MaxBlockRows==8)
-              {
-                ei_pstore(&(localRes[4]), ei_padd(ei_pload(&(localRes[4])), ei_preduxp(&(dst[4]))));
-                if (PacketSize==2)
-                  ei_pstore(&(localRes[6]), ei_padd(ei_pload(&(localRes[6])), ei_preduxp(&(dst[6]))));
-              }
-            }
-            else
-            {
-              // not aligned => per coeff packet reduction
-              localRes[0] += ei_predux(dst[0]);
-              localRes[1] += ei_predux(dst[1]);
-              localRes[2] += ei_predux(dst[2]);
-              localRes[3] += ei_predux(dst[3]);
-              if (MaxBlockRows==8)
-              {
-                localRes[4] += ei_predux(dst[4]);
-                localRes[5] += ei_predux(dst[5]);
-                localRes[6] += ei_predux(dst[6]);
-                localRes[7] += ei_predux(dst[7]);
-              }
-            }
-          }
-        }
-        if (l2blockRemainingRows>0)
-        {
-          int offsetblock = l2k * (l2blockRowEnd-l2i) + (l2blockRowEndBW-l2i)*(l2blockSizeEnd-l2k) - l2k*l2blockRemainingRows;
-          const Scalar* localB = &block[offsetblock];
-
-          for(int l1j=l2j; l1j<l2blockColEnd; l1j+=1)
-          {
-            const Scalar* EIGEN_RESTRICT rhsColumn;
-            if (needRhsCopy)
-              rhsColumn = &(rhsCopy[l2BlockSizeAligned*(l1j-l2j)-l2k]);
-            else
-              rhsColumn = &(rhs[l1j*rhsStride]);
-
-            PacketType dst[MaxBlockRows];
-            dst[3] = dst[2] = dst[1] = dst[0] = ei_pset1(Scalar(0.));
-            if (MaxBlockRows==8)
-              dst[7] = dst[6] = dst[5] = dst[4] = dst[0];
-
-            // let's declare a few other temporary registers
-            PacketType tmp;
-
-            for(int k=l2k; k<l2blockSizeEnd; k+=PacketSize)
-            {
-              tmp = ei_pload(&rhsColumn[k]);
-
-                                           dst[0] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows             ])), dst[0]);
-              if (l2blockRemainingRows>=2) dst[1] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+  PacketSize])), dst[1]);
-              if (l2blockRemainingRows>=3) dst[2] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+2*PacketSize])), dst[2]);
-              if (l2blockRemainingRows>=4) dst[3] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+3*PacketSize])), dst[3]);
-              if (MaxBlockRows==8)
-              {
-                if (l2blockRemainingRows>=5) dst[4] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+4*PacketSize])), dst[4]);
-                if (l2blockRemainingRows>=6) dst[5] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+5*PacketSize])), dst[5]);
-                if (l2blockRemainingRows>=7) dst[6] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+6*PacketSize])), dst[6]);
-                if (l2blockRemainingRows>=8) dst[7] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+7*PacketSize])), dst[7]);
-              }
-            }
-
-            Scalar* EIGEN_RESTRICT localRes = &(res[l2blockRowEndBW + l1j*resStride]);
-
-            // process the remaining rows once at a time
-                                         localRes[0] += ei_predux(dst[0]);
-            if (l2blockRemainingRows>=2) localRes[1] += ei_predux(dst[1]);
-            if (l2blockRemainingRows>=3) localRes[2] += ei_predux(dst[2]);
-            if (l2blockRemainingRows>=4) localRes[3] += ei_predux(dst[3]);
-            if (MaxBlockRows==8)
-            {
-              if (l2blockRemainingRows>=5) localRes[4] += ei_predux(dst[4]);
-              if (l2blockRemainingRows>=6) localRes[5] += ei_predux(dst[5]);
-              if (l2blockRemainingRows>=7) localRes[6] += ei_predux(dst[6]);
-              if (l2blockRemainingRows>=8) localRes[7] += ei_predux(dst[7]);
-            }
-
-          }
-        }
-      }
-    }
-  }
-  if (PacketSize>1 && remainingSize)
-  {
-    if (lhsRowMajor)
-    {
-      for (int j=0; j<cols; ++j)
-        for (int i=0; i<rows; ++i)
-        {
-          Scalar tmp = lhs[i*lhsStride+size] * rhs[j*rhsStride+size];
-          // FIXME this loop get vectorized by the compiler !
-          for (int k=1; k<remainingSize; ++k)
-            tmp += lhs[i*lhsStride+size+k] * rhs[j*rhsStride+size+k];
-          res[i+j*resStride] += tmp;
-        }
-    }
-    else
-    {
-      for (int j=0; j<cols; ++j)
-        for (int i=0; i<rows; ++i)
-        {
-          Scalar tmp = lhs[i+size*lhsStride] * rhs[j*rhsStride+size];
-          for (int k=1; k<remainingSize; ++k)
-            tmp += lhs[i+(size+k)*lhsStride] * rhs[j*rhsStride+size+k];
-          res[i+j*resStride] += tmp;
-        }
-    }
-  }
-
-  ei_free_stack(block, Scalar, allocBlockSize);
-  ei_free_stack(rhsCopy, Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
-}
-
-#endif // EIGEN_EXTERN_INSTANTIATIONS
+#ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
+#define EIGEN_GENERAL_MATRIX_VECTOR_H

 /* Optimized col-major matrix * vector product:
 * This algorithm processes 4 columns at onces that allows to both reduce
@@ -361,13 +42,14 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_colmajor_times_vector(
  #ifdef _EIGEN_ACCUMULATE_PACKETS
  #error _EIGEN_ACCUMULATE_PACKETS has already been defined
  #endif
-
-  #define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2,OFFSET) \
-    ei_pstore(&res[j OFFSET], \
-      ei_padd(ei_pload(&res[j OFFSET]), \
+  #define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) \
+    ei_pstore(&res[j], \
+      ei_padd(ei_pload(&res[j]), \
        ei_padd( \
-          ei_padd(ei_pmul(ptmp0,ei_pload ## A0(&lhs0[j OFFSET])),ei_pmul(ptmp1,ei_pload ## A13(&lhs1[j OFFSET]))), \
-          ei_padd(ei_pmul(ptmp2,ei_pload ## A2(&lhs2[j OFFSET])),ei_pmul(ptmp3,ei_pload ## A13(&lhs3[j OFFSET]))) )))
+          ei_padd(ei_pmul(ptmp0,EIGEN_CAT(ei_ploa , A0)(&lhs0[j])), \
+                  ei_pmul(ptmp1,EIGEN_CAT(ei_ploa , A13)(&lhs1[j]))), \
+          ei_padd(ei_pmul(ptmp2,EIGEN_CAT(ei_ploa , A2)(&lhs2[j])), \
+                  ei_pmul(ptmp3,EIGEN_CAT(ei_ploa , A13)(&lhs3[j]))) )))

  typedef typename ei_packet_traits<Scalar>::type Packet;
  const int PacketSize = sizeof(Packet)/sizeof(Scalar);
@@ -397,7 +79,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_colmajor_times_vector(
  if (PacketSize>1)
  {
    ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize);
-    
+
    while (skipColumns<PacketSize &&
           alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%PacketSize))
      ++skipColumns;
@@ -418,7 +100,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_colmajor_times_vector(

  int offset1 = (FirstAligned && alignmentStep==1?3:1);
  int offset3 = (FirstAligned && alignmentStep==1?1:3);
-  
+
  int columnBound = ((rhs.size()-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
  for (int i=skipColumns; i<columnBound; i+=columnsAtOnce)
  {
@@ -433,7 +115,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_colmajor_times_vector(
    {
      /* explicit vectorization */
      // process initial unaligned coeffs
-      for (int j=0; j<alignedStart; j++)
+      for (int j=0; j<alignedStart; ++j)
        res[j] += ei_pfirst(ptmp0)*lhs0[j] + ei_pfirst(ptmp1)*lhs1[j] + ei_pfirst(ptmp2)*lhs2[j] + ei_pfirst(ptmp3)*lhs3[j];

      if (alignedSize>alignedStart)
@@ -442,11 +124,11 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_colmajor_times_vector(
        {
          case AllAligned:
            for (int j = alignedStart; j<alignedSize; j+=PacketSize)
-              _EIGEN_ACCUMULATE_PACKETS(,,,);
+              _EIGEN_ACCUMULATE_PACKETS(d,d,d);
            break;
          case EvenAligned:
            for (int j = alignedStart; j<alignedSize; j+=PacketSize)
-              _EIGEN_ACCUMULATE_PACKETS(,u,,);
+              _EIGEN_ACCUMULATE_PACKETS(d,du,d);
            break;
          case FirstAligned:
            if(peels>1)
@@ -482,19 +164,19 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_colmajor_times_vector(
              }
            }
            for (int j = peeledSize; j<alignedSize; j+=PacketSize)
-              _EIGEN_ACCUMULATE_PACKETS(,u,u,);
+              _EIGEN_ACCUMULATE_PACKETS(d,du,du);
            break;
          default:
            for (int j = alignedStart; j<alignedSize; j+=PacketSize)
-              _EIGEN_ACCUMULATE_PACKETS(u,u,u,);
+              _EIGEN_ACCUMULATE_PACKETS(du,du,du);
            break;
        }
      }
    } // end explicit vectorization

    /* process remaining coeffs (or all if there is no explicit vectorization) */
-    for (int j=alignedSize; j<size; j++)
-      res[j] += ei_pfirst(ptmp0)*lhs0[j] + ei_pfirst(ptmp1)*lhs1[j] + ei_pfirst(ptmp2)*lhs2[j] + ei_pfirst(ptmp3)*lhs3[j];
+    for (int j=alignedSize; j<size; ++j)
+	  res[j] += ei_pfirst(ptmp0)*lhs0[j] + ei_pfirst(ptmp1)*lhs1[j] + ei_pfirst(ptmp2)*lhs2[j] + ei_pfirst(ptmp3)*lhs3[j];
  }

  // process remaining first and last columns (at most columnsAtOnce-1)
@@ -502,7 +184,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_colmajor_times_vector(
  int start = columnBound;
  do
  {
-    for (int i=start; i<end; i++)
+    for (int i=start; i<end; ++i)
    {
      Packet ptmp0 = ei_pset1(rhs[i]);
      const Scalar* lhs0 = lhs + i*lhsStride;
@@ -511,7 +193,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_colmajor_times_vector(
      {
        /* explicit vectorization */
        // process first unaligned result's coeffs
-        for (int j=0; j<alignedStart; j++)
+        for (int j=0; j<alignedStart; ++j)
          res[j] += ei_pfirst(ptmp0) * lhs0[j];

        // process aligned result's coeffs
@@ -524,7 +206,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_colmajor_times_vector(
      }

      // process remaining scalars (or all if no explicit vectorization)
-      for (int j=alignedSize; j<size; j++)
+      for (int j=alignedSize; j<size; ++j)
        res[j] += ei_pfirst(ptmp0) * lhs0[j];
    }
    if (skipColumns)
@@ -550,12 +232,12 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
  #error _EIGEN_ACCUMULATE_PACKETS has already been defined
  #endif

-  #define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2,OFFSET) {\
+  #define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) {\
    Packet b = ei_pload(&rhs[j]); \
-    ptmp0 = ei_pmadd(b, ei_pload##A0 (&lhs0[j]), ptmp0); \
-    ptmp1 = ei_pmadd(b, ei_pload##A13(&lhs1[j]), ptmp1); \
-    ptmp2 = ei_pmadd(b, ei_pload##A2 (&lhs2[j]), ptmp2); \
-    ptmp3 = ei_pmadd(b, ei_pload##A13(&lhs3[j]), ptmp3); }
+    ptmp0 = ei_pmadd(b, EIGEN_CAT(ei_ploa,A0) (&lhs0[j]), ptmp0); \
+    ptmp1 = ei_pmadd(b, EIGEN_CAT(ei_ploa,A13)(&lhs1[j]), ptmp1); \
+    ptmp2 = ei_pmadd(b, EIGEN_CAT(ei_ploa,A2) (&lhs2[j]), ptmp2); \
+    ptmp3 = ei_pmadd(b, EIGEN_CAT(ei_ploa,A13)(&lhs3[j]), ptmp3); }

  typedef typename ei_packet_traits<Scalar>::type Packet;
  const int PacketSize = sizeof(Packet)/sizeof(Scalar);
@@ -580,13 +262,13 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(

  // we cannot assume the first element is aligned because of sub-matrices
  const int lhsAlignmentOffset = ei_alignmentOffset(lhs,size);
-  
+
  // find how many rows do we have to skip to be aligned with rhs (if possible)
  int skipRows = 0;
  if (PacketSize>1)
  {
    ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0  || size<PacketSize);
-    
+
    while (skipRows<PacketSize &&
           alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%PacketSize))
      ++skipRows;
@@ -607,7 +289,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(

  int offset1 = (FirstAligned && alignmentStep==1?3:1);
  int offset3 = (FirstAligned && alignmentStep==1?1:3);
-  
+
  int rowBound = ((res.size()-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
  for (int i=skipRows; i<rowBound; i+=rowsAtOnce)
  {
@@ -621,10 +303,10 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
    {
      /* explicit vectorization */
      Packet ptmp0 = ei_pset1(Scalar(0)), ptmp1 = ei_pset1(Scalar(0)), ptmp2 = ei_pset1(Scalar(0)), ptmp3 = ei_pset1(Scalar(0));
-      
+
      // process initial unaligned coeffs
      // FIXME this loop get vectorized by the compiler !
-      for (int j=0; j<alignedStart; j++)
+      for (int j=0; j<alignedStart; ++j)
      {
        Scalar b = rhs[j];
        tmp0 += b*lhs0[j]; tmp1 += b*lhs1[j]; tmp2 += b*lhs2[j]; tmp3 += b*lhs3[j];
@@ -636,11 +318,11 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
        {
          case AllAligned:
            for (int j = alignedStart; j<alignedSize; j+=PacketSize)
-              _EIGEN_ACCUMULATE_PACKETS(,,,);
+              _EIGEN_ACCUMULATE_PACKETS(d,d,d);
            break;
          case EvenAligned:
            for (int j = alignedStart; j<alignedSize; j+=PacketSize)
-              _EIGEN_ACCUMULATE_PACKETS(,u,,);
+              _EIGEN_ACCUMULATE_PACKETS(d,du,d);
            break;
          case FirstAligned:
            if (peels>1)
@@ -679,11 +361,11 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
              }
            }
            for (int j = peeledSize; j<alignedSize; j+=PacketSize)
-              _EIGEN_ACCUMULATE_PACKETS(,u,u,);
+              _EIGEN_ACCUMULATE_PACKETS(d,du,du);
            break;
          default:
            for (int j = alignedStart; j<alignedSize; j+=PacketSize)
-              _EIGEN_ACCUMULATE_PACKETS(u,u,u,);
+              _EIGEN_ACCUMULATE_PACKETS(du,du,du);
            break;
        }
        tmp0 += ei_predux(ptmp0);
@@ -695,7 +377,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(

    // process remaining coeffs (or all if no explicit vectorization)
    // FIXME this loop get vectorized by the compiler !
-    for (int j=alignedSize; j<size; j++)
+    for (int j=alignedSize; j<size; ++j)
    {
      Scalar b = rhs[j];
      tmp0 += b*lhs0[j]; tmp1 += b*lhs1[j]; tmp2 += b*lhs2[j]; tmp3 += b*lhs3[j];
@@ -708,14 +390,14 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
  int start = rowBound;
  do
  {
-    for (int i=start; i<end; i++)
+    for (int i=start; i<end; ++i)
    {
      Scalar tmp0 = Scalar(0);
      Packet ptmp0 = ei_pset1(tmp0);
      const Scalar* lhs0 = lhs + i*lhsStride;
      // process first unaligned result's coeffs
      // FIXME this loop get vectorized by the compiler !
-      for (int j=0; j<alignedStart; j++)
+      for (int j=0; j<alignedStart; ++j)
        tmp0 += rhs[j] * lhs0[j];

      if (alignedSize>alignedStart)
@@ -732,7 +414,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(

      // process remaining scalars
      // FIXME this loop get vectorized by the compiler !
-      for (int j=alignedSize; j<size; j++)
+      for (int j=alignedSize; j<size; ++j)
        tmp0 += rhs[j] * lhs0[j];
      res[i] += tmp0;
    }
@@ -749,4 +431,4 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
  #undef _EIGEN_ACCUMULATE_PACKETS
 }

-#endif // EIGEN_CACHE_FRIENDLY_PRODUCT_H
+#endif // EIGEN_GENERAL_MATRIX_VECTOR_H
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -0,0 +1,146 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
+#define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
+
+template<bool Conjugate> struct ei_conj_if {
+  template<typename Scalar> Scalar operator() (const Scalar& x) const { return ei_conj(x); }
+};
+
+template<> struct ei_conj_if<false> {
+  template<typename Scalar> Scalar& operator() (Scalar& x) const { return x; }
+};
+
+/* Optimized col-major selfadjoint matrix * vector product:
+ * This algorithm processes 2 columns at onces that allows to both reduce
+ * the number of load/stores of the result by a factor 2 and to reduce
+ * the instruction dependency.
+ */
+template<typename Scalar, int StorageOrder, int UpLo>
+static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
+  int size,
+  const Scalar* lhs, int lhsStride,
+  const Scalar* rhs, //int rhsIncr,
+  Scalar* res)
+{
+  typedef typename ei_packet_traits<Scalar>::type Packet;
+  const int PacketSize = sizeof(Packet)/sizeof(Scalar);
+
+  enum {
+    IsRowMajor = StorageOrder==RowMajorBit ? 1 : 0,
+    IsLower = UpLo == LowerTriangularBit ? 1 : 0,
+    FirstTriangular = IsRowMajor == IsLower
+  };
+
+  ei_conj_if<NumTraits<Scalar>::IsComplex && IsRowMajor> conj0;
+  ei_conj_if<NumTraits<Scalar>::IsComplex && !IsRowMajor> conj1;
+
+  for (int i=0;i<size;i++)
+    res[i] = 0;
+
+  int bound = std::max(0,size-8) & 0xfffffffE;
+  if (FirstTriangular)
+    bound = size - bound;
+
+  for (int j=FirstTriangular ? bound : 0;
+       j<(FirstTriangular ? size : bound);j+=2)
+  {
+    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
+    register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
+
+    Scalar t0 = rhs[j];
+    Packet ptmp0 = ei_pset1(t0);
+    Scalar t1 = rhs[j+1];
+    Packet ptmp1 = ei_pset1(t1);
+
+    Scalar t2 = 0;
+    Packet ptmp2 = ei_pset1(t2);
+    Scalar t3 = 0;
+    Packet ptmp3 = ei_pset1(t3);
+
+    size_t starti = FirstTriangular ? 0 : j+2;
+    size_t endi   = FirstTriangular ? j : size;
+    size_t alignedEnd = starti;
+    size_t alignedStart = (starti) + ei_alignmentOffset(&res[starti], endi-starti);
+    alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
+
+    res[j]   += t0 * conj0(A0[j]);
+    if(FirstTriangular)
+    {
+      res[j+1]   += t1 * conj0(A1[j+1]);
+      res[j] += t1 * conj0(A1[j]);
+      t3 += conj1(A1[j]) * rhs[j];
+    }
+    else
+    {
+      res[j+1] += t0 * conj0(A0[j+1]) + t1 * conj0(A1[j+1]);
+      t2 += conj1(A0[j+1]) * rhs[j+1];
+    }
+
+    for (size_t i=starti; i<alignedStart; ++i)
+    {
+      res[i] += t0 * A0[i] + t1 * A1[i];
+      t2 += ei_conj(A0[i]) * rhs[i];
+      t3 += ei_conj(A1[i]) * rhs[i];
+    }
+    for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
+    {
+      Packet A0i = ei_ploadu(&A0[i]);
+      Packet A1i = ei_ploadu(&A1[i]);
+      Packet Bi = ei_ploadu(&rhs[i]); // FIXME should be aligned in most cases
+      Packet Xi = ei_pload(&res[i]);
+
+      Xi = ei_padd(ei_padd(Xi, ei_pmul(ptmp0, conj0(A0i))), ei_pmul(ptmp1, conj0(A1i)));
+      ptmp2 = ei_padd(ptmp2, ei_pmul(conj1(A0i), Bi));
+      ptmp3 = ei_padd(ptmp3, ei_pmul(conj1(A1i), Bi));
+      ei_pstore(&res[i],Xi);
+    }
+    for (size_t i=alignedEnd; i<endi; i++)
+    {
+      res[i] += t0 * conj0(A0[i]) + t1 * conj0(A1[i]);
+      t2 += conj1(A0[i]) * rhs[i];
+      t3 += conj1(A1[i]) * rhs[i];
+    }
+
+    res[j]   += t2 + ei_predux(ptmp2);
+    res[j+1] += t3 + ei_predux(ptmp3);
+  }
+  for (int j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
+  {
+    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
+
+    Scalar t1 = rhs[j];
+    Scalar t2 = 0;
+    res[j] += t1 * conj0(A0[j]);
+    for (int i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++) {
+      res[i] += t1 * conj0(A0[i]);
+      t2 += conj1(A0[i]) * rhs[i];
+    }
+    res[j] += t2;
+  }
+}
+
+
+#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H
--- a/Eigen/src/Core/util/CMakeLists.txt
+++ b/Eigen/src/Core/util/CMakeLists.txt
@@ -2,5 +2,5 @@ FILE(GLOB Eigen_Core_util_SRCS "*.h")

 INSTALL(FILES 
  ${Eigen_Core_util_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/util
+  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/util COMPONENT Devel
  )
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -26,7 +26,28 @@
 #ifndef EIGEN_CONSTANTS_H
 #define EIGEN_CONSTANTS_H

-const int Dynamic = 10000;
+/** This value means that a quantity is not known at compile-time, and that instead the value is
+  * stored in some runtime variable.
+  *
+  * Explanation for the choice of this value:
+  * - It should be positive and larger than the number of entries in any reasonable fixed-size matrix.
+  *   This allows to simplify many compile-time conditions throughout Eigen.
+  * - It should be smaller than the sqrt of INT_MAX. Indeed, we often multiply a number of rows with a number
+  *   of columns in order to compute a number of coefficients. Even if we guard that with an "if" checking whether
+  *   the values are Dynamic, we still get a compiler warning "integer overflow". So the only way to get around
+  *   it would be a meta-selector. Doing this everywhere would reduce code readability and lenghten compilation times.
+  *   Also, disabling compiler warnings for integer overflow, sounds like a bad idea.
+  * - It should be a prime number, because for example the old value 10000 led to bugs with 100x100 matrices.
+  *
+  * If you wish to port Eigen to a platform where sizeof(int)==2, it is perfectly possible to set Dynamic to, say, 97.
+  * However, changing the value of Dynamic breaks the ABI, as Dynamic is often used as a template parameter for Matrix.
+  */
+const int Dynamic = 33331;
+
+/** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>().
+  * The value Infinity there means the L-infinity norm.
+  */
+const int Infinity = -1;

 /** \defgroup flags flags
  * \ingroup Core_Module
@@ -34,6 +55,10 @@ const int Dynamic = 10000;
  * These are the possible bits which can be OR'ed to constitute the flags of a matrix or
  * expression.
  *
+  * It is important to note that these flags are a purely compile-time notion. They are a compile-time property of
+  * an expression type, implemented as enum's. They are not stored in memory at runtime, and they do not incur any
+  * runtime overhead.
+  *
  * \sa MatrixBase::Flags
  */

@@ -115,7 +140,7 @@ const unsigned int LinearAccessBit = 0x10;
  * First, references to the coefficients must be available through coeffRef(int, int). This rules out read-only
  * expressions whose coefficients are computed on demand by coeff(int, int). Second, the memory layout of the
  * array of coefficients must be exactly the natural one suggested by rows(), cols(), stride(), and the RowMajorBit.
-  * This rules out expressions such as DiagonalCoeffs, whose coefficients, though referencable, do not have
+  * This rules out expressions such as Diagonal, whose coefficients, though referencable, do not have
  * such a regular memory layout.
  */
 const unsigned int DirectAccessBit = 0x20;
@@ -161,24 +186,31 @@ const unsigned int HereditaryBits = RowMajorBit
                                  | EvalBeforeAssigningBit
                                  | SparseBit;

-// Possible values for the Mode parameter of part() and of extract()
-const unsigned int Upper = UpperTriangularBit;
-const unsigned int StrictlyUpper = UpperTriangularBit | ZeroDiagBit;
-const unsigned int Lower = LowerTriangularBit;
-const unsigned int StrictlyLower = LowerTriangularBit | ZeroDiagBit;
+// diagonal means both upper and lower triangular
+const unsigned DiagonalBits = UpperTriangularBit | LowerTriangularBit;
+    
+// Possible values for the Mode parameter of part()
+const unsigned int UpperTriangular = UpperTriangularBit;
+const unsigned int StrictlyUpperTriangular = UpperTriangularBit | ZeroDiagBit;
+const unsigned int LowerTriangular = LowerTriangularBit;
+const unsigned int StrictlyLowerTriangular = LowerTriangularBit | ZeroDiagBit;
 const unsigned int SelfAdjoint = SelfAdjointBit;
+const unsigned int UnitUpperTriangular = UpperTriangularBit | UnitDiagBit;
+const unsigned int UnitLowerTriangular = LowerTriangularBit | UnitDiagBit;

-// additional possible values for the Mode parameter of extract()
-const unsigned int UnitUpper = UpperTriangularBit | UnitDiagBit;
-const unsigned int UnitLower = LowerTriangularBit | UnitDiagBit;
-const unsigned int Diagonal = Upper | Lower;
+template<typename T> struct ei_is_diagonal
+{
+  enum {
+    ret = ( (unsigned int)(T::Flags) & DiagonalBits ) == DiagonalBits
+  };
+};

 enum { Aligned, Unaligned };
 enum { ForceAligned, AsRequested };
 enum { ConditionalJumpCost = 5 };
 enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
-enum DirectionType { Vertical, Horizontal };
-enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, DiagonalProduct, SparseProduct };
+enum DirectionType { Vertical, Horizontal, BothDirections };
+enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, DiagonalProduct, SparseTimeSparseProduct, SparseTimeDenseProduct, DenseTimeSparseProduct };

 enum {
  /** \internal Equivalent to a slice vectorization for fixed-size matrices having good alignment
@@ -194,26 +226,45 @@ enum {
 };

 enum {
-  CompleteUnrolling,
+  NoUnrolling,
  InnerUnrolling,
-  NoUnrolling
+  CompleteUnrolling
 };

 enum {
  ColMajor = 0,
-  RowMajor = RowMajorBit
+  RowMajor = 0x1,  // it is only a coincidence that this is equal to RowMajorBit -- don't rely on that
+  /** \internal Align the matrix itself if it is vectorizable fixed-size */
+  AutoAlign = 0,
+  /** \internal Don't require alignment for the matrix itself (the array of coefficients, if dynamically allocated, may still be
+                requested to be aligned) */
+  DontAlign = 0x2
 };

 enum {
  IsDense         = 0,
+  IsSparse        = SparseBit,
  NoDirectAccess  = 0,
-  HasDirectAccess = DirectAccessBit,
-  IsSparse        = SparseBit
+  HasDirectAccess = DirectAccessBit
 };

-const int FullyCoherentAccessPattern  = 0x1;
-const int InnerCoherentAccessPattern  = 0x2 | FullyCoherentAccessPattern;
-const int OuterCoherentAccessPattern  = 0x4 | InnerCoherentAccessPattern;
-const int RandomAccessPattern         = 0x8 | OuterCoherentAccessPattern;
+enum TransformTraits {
+  Isometry      = 0x1,
+  Affine        = 0x2,
+  AffineCompact = 0x10 | Affine,
+  Projective    = 0x20
+};
+
+const int EiArch_Generic = 0x0;
+const int EiArch_SSE     = 0x1;
+const int EiArch_AltiVec = 0x2;
+
+#if defined EIGEN_VECTORIZE_SSE
+  const int EiArch = EiArch_SSE;
+#elif defined EIGEN_VECTORIZE_ALTIVEC
+  const int EiArch = EiArch_AltiVec;
+#else
+  const int EiArch = EiArch_Generic;
+#endif

 #endif // EIGEN_CONSTANTS_H
--- a/Eigen/src/Core/util/DisableMSVCWarnings.h
+++ b/Eigen/src/Core/util/DisableMSVCWarnings.h
@@ -0,0 +1,5 @@
+
+#ifdef _MSC_VER
+  #pragma warning( push )
+  #pragma warning( disable : 4181 4244 4127 4211 4717 )
+#endif
--- a/Eigen/src/Core/util/EnableMSVCWarnings.h
+++ b/Eigen/src/Core/util/EnableMSVCWarnings.h
@@ -0,0 +1,4 @@
+
+#ifdef _MSC_VER
+  #pragma warning( pop )
+#endif
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -28,7 +28,8 @@
 template<typename T> struct ei_traits;
 template<typename T> struct NumTraits;

-template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder = ColMajor,
+template<typename _Scalar, int _Rows, int _Cols,
+         int _Options = EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION | AutoAlign,
         int _MaxRows = _Rows, int _MaxCols = _Cols> class Matrix;

 template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged;
@@ -44,14 +45,17 @@ template<typename NullaryOp, typename MatrixType>         class CwiseNullaryOp;
 template<typename UnaryOp,   typename MatrixType>         class CwiseUnaryOp;
 template<typename BinaryOp,  typename Lhs, typename Rhs>  class CwiseBinaryOp;
 template<typename Lhs, typename Rhs, int ProductMode> class Product;
-template<typename CoeffsVectorType> class DiagonalMatrix;
-template<typename MatrixType> class DiagonalCoeffs;
+template<typename CoeffsVectorType, typename Derived> class DiagonalMatrixBase;
+template<typename CoeffsVectorType> class DiagonalMatrixWrapper;
+template<typename _Scalar, int _Size> class DiagonalMatrix;
+template<typename MatrixType, int Index> class Diagonal;
 template<typename MatrixType, int PacketAccess = AsRequested> class Map;
 template<typename MatrixType, unsigned int Mode> class Part;
 template<typename MatrixType, unsigned int Mode> class Extract;
 template<typename ExpressionType> class Cwise;
 template<typename ExpressionType> class WithFormat;
 template<typename MatrixType> struct CommaInitializer;
+template<typename Functor, typename EvalType> class ReturnByValue;


 template<typename Lhs, typename Rhs> struct ei_product_mode;
@@ -64,6 +68,7 @@ template<typename Scalar> struct ei_scalar_quotient_op;
 template<typename Scalar> struct ei_scalar_opposite_op;
 template<typename Scalar> struct ei_scalar_conjugate_op;
 template<typename Scalar> struct ei_scalar_real_op;
+template<typename Scalar> struct ei_scalar_imag_op;
 template<typename Scalar> struct ei_scalar_abs_op;
 template<typename Scalar> struct ei_scalar_abs2_op;
 template<typename Scalar> struct ei_scalar_sqrt_op;
@@ -85,6 +90,8 @@ template<typename Scalar> struct ei_scalar_add_op;
 template<typename Scalar> struct ei_scalar_constant_op;
 template<typename Scalar> struct ei_scalar_identity_op;

+template<typename Scalar1,typename Scalar2> struct ei_scalar_multiple2_op;
+
 struct IOFormat;

 template<typename Scalar>
@@ -98,12 +105,15 @@ void ei_cache_friendly_product(
 template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> class Select;
 template<typename MatrixType, typename BinaryOp, int Direction> class PartialReduxExpr;
 template<typename ExpressionType, int Direction> class PartialRedux;
+template<typename MatrixType,int RowFactor,int ColFactor> class Replicate;
+template<typename MatrixType, int Direction = BothDirections> class Reverse;

 template<typename MatrixType> class LU;
+template<typename MatrixType> class PartialLU;
 template<typename MatrixType> class QR;
 template<typename MatrixType> class SVD;
-template<typename MatrixType> class Cholesky;
-template<typename MatrixType> class CholeskyWithoutSquareRoot;
+template<typename MatrixType> class LLT;
+template<typename MatrixType> class LDLT;

 // Geometry module:
 template<typename Derived, int _Dim> class RotationBase;
@@ -111,10 +121,14 @@ template<typename Lhs, typename Rhs> class Cross;
 template<typename Scalar> class Quaternion;
 template<typename Scalar> class Rotation2D;
 template<typename Scalar> class AngleAxis;
-template<typename Scalar,int Dim> class Transform;
+template<typename Scalar,int Dim,int Mode=Affine> class Transform;
 template <typename _Scalar, int _AmbientDim> class ParametrizedLine;
 template <typename _Scalar, int _AmbientDim> class Hyperplane;
 template<typename Scalar,int Dim> class Translation;
-template<typename Scalar,int Dim> class Scaling;
+template<typename Scalar> class UniformScaling;
+template<typename MatrixType,int Direction> class Homogeneous;
+
+// Sparse module:
+template<typename Lhs, typename Rhs, int ProductMode> class SparseProduct;

 #endif // EIGEN_FORWARDDECLARATIONS_H
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -28,15 +28,69 @@

 #undef minor

-/** \internal  Defines the maximal loop size to enable meta unrolling of loops */
+#define EIGEN_WORLD_VERSION 2
+#define EIGEN_MAJOR_VERSION 0
+#define EIGEN_MINOR_VERSION 52
+
+#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
+                                      (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
+                                                                 EIGEN_MINOR_VERSION>=z))))
+
+// if the compiler is GNUC, disable 16 byte alignment on exotic archs that probably don't need it, and on which
+// it may be extra trouble to get aligned memory allocation to work (example: on ARM, overloading new[] is a PITA
+// because extra memory must be allocated for bookkeeping).
+// if the compiler is not GNUC, just cross fingers that the architecture isn't too exotic, because we don't want
+// to keep track of all the different preprocessor symbols for all compilers.
+#if (!defined(__GNUC__)) || defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__ia64__)
+  #define EIGEN_ARCH_WANTS_ALIGNMENT 1
+#else
+  #define EIGEN_ARCH_WANTS_ALIGNMENT 0
+#endif
+
+// EIGEN_ALIGN is the true test whether we want to align or not. It takes into account both the user choice to explicitly disable
+// alignment (EIGEN_DONT_ALIGN) and the architecture config (EIGEN_ARCH_WANTS_ALIGNMENT). Henceforth, only EIGEN_ALIGN should be used.
+#if EIGEN_ARCH_WANTS_ALIGNMENT && !defined(EIGEN_DONT_ALIGN)
+  #define EIGEN_ALIGN 1
+#else
+  #define EIGEN_ALIGN 0
+  #ifdef EIGEN_VECTORIZE
+    #error Vectorization enabled, but the architecture is not listed among those for which we require 16 byte alignment. If you added vectorization for another architecture, you also need to edit this list.
+  #endif
+  #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+    #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+  #endif
+#endif
+
+#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
+#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION RowMajor
+#else
+#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ColMajor
+#endif
+
+/** Defines the maximal loop size to enable meta unrolling of loops.
+  * Note that the value here is expressed in Eigen's own notion of "number of FLOPS",
+  * it does not correspond to the number of iterations or the number of instructions
+  */
 #ifndef EIGEN_UNROLLING_LIMIT
 #define EIGEN_UNROLLING_LIMIT 100
 #endif

-/** \internal Define the maximal size in Bytes of L2 blocks.
-  * The current value is set to generate blocks of 256x256 for float */
-#ifndef EIGEN_TUNE_FOR_L2_CACHE_SIZE
-#define EIGEN_TUNE_FOR_L2_CACHE_SIZE (1024*256)
+/** Defines the maximal size in Bytes of blocks fitting in CPU cache.
+  * The current value is set to generate blocks of 256x256 for float
+  *
+  * Typically for a single-threaded application you would set that to 25% of the size of your CPU caches in bytes
+  */
+#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE
+#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (sizeof(float)*256*256)
+#endif
+
+/** Allows to disable some optimizations which might affect the accuracy of the result.
+  * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
+  * They currently include:
+  *   - single precision Cwise::sin() and Cwise::cos() when SSE vectorization is enabled. 
+  */
+#ifndef EIGEN_FAST_MATH
+#define EIGEN_FAST_MATH 1
 #endif

 #define USING_PART_OF_NAMESPACE_EIGEN \
@@ -70,7 +124,7 @@ using Eigen::ei_cos;
 #endif

 #ifdef EIGEN_INTERNAL_DEBUGGING
-#define ei_internal_assert(x) ei_assert(x);
+#define ei_internal_assert(x) ei_assert(x)
 #else
 #define ei_internal_assert(x)
 #endif
@@ -81,46 +135,110 @@ using Eigen::ei_cos;
 #define EIGEN_ONLY_USED_FOR_DEBUG(x)
 #endif

+// EIGEN_ALWAYS_INLINE_ATTRIB should be use in the declaration of function
+// which should be inlined even in debug mode.
 // FIXME with the always_inline attribute,
 // gcc 3.4.x reports the following compilation error:
 //   Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
 //    : function body not available
 #if EIGEN_GNUC_AT_LEAST(4,0)
-#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
+#define EIGEN_ALWAYS_INLINE_ATTRIB __attribute__((always_inline))
 #else
-#define EIGEN_ALWAYS_INLINE inline
+#define EIGEN_ALWAYS_INLINE_ATTRIB
+#endif
+
+// EIGEN_FORCE_INLINE means "inline as much as possible"
+#if (defined _MSC_VER)
+#define EIGEN_STRONG_INLINE __forceinline
+#else
+#define EIGEN_STRONG_INLINE inline
 #endif

 #if (defined __GNUC__)
 #define EIGEN_DONT_INLINE __attribute__((noinline))
+#elif (defined _MSC_VER)
+#define EIGEN_DONT_INLINE __declspec(noinline)
 #else
 #define EIGEN_DONT_INLINE
 #endif

 #if (defined __GNUC__)
-#define EIGEN_ALIGN_128 __attribute__ ((aligned(16)))
+#define EIGEN_DEPRECATED __attribute__((deprecated))
+#elif (defined _MSC_VER)
+#define EIGEN_DEPRECATED __declspec(deprecated)
 #else
+#define EIGEN_DEPRECATED
+#endif
+
+#if (defined __GNUC__)
+#define EIGEN_UNUSED __attribute__((unused))
+#else
+#define EIGEN_UNUSED
+#endif
+
+#if (defined __GNUC__)
+#define EIGEN_ASM_COMMENT(X)  asm("#"X)
+#else
+#define EIGEN_ASM_COMMENT(X)
+#endif
+
+/* EIGEN_ALIGN_128 forces data to be 16-byte aligned, EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
+ * so that vectorization doesn't affect binary compatibility.
+ *
+ * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
+ * vectorized and non-vectorized code.
+ */
+#if !EIGEN_ALIGN
 #define EIGEN_ALIGN_128
+#elif (defined __GNUC__)
+#define EIGEN_ALIGN_128 __attribute__((aligned(16)))
+#elif (defined _MSC_VER)
+#define EIGEN_ALIGN_128 __declspec(align(16))
+#else
+#error Please tell me what is the equivalent of __attribute__((aligned(16))) for your compiler
 #endif

 #define EIGEN_RESTRICT __restrict

+#ifndef EIGEN_STACK_ALLOCATION_LIMIT
+#define EIGEN_STACK_ALLOCATION_LIMIT 1000000
+#endif
+
+#ifndef EIGEN_DEFAULT_IO_FORMAT
+#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat()
+#endif
+
+// just an empty macro !
+#define EIGEN_EMPTY
+
+// concatenate two tokens
+#define EIGEN_CAT2(a,b) a ## b
+#define EIGEN_CAT(a,b) EIGEN_CAT2(a,b)
+
+// convert a token to a string
+#define EIGEN_MAKESTRING2(a) #a
+#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
+
+// format used in Eigen's documentation
+// needed to define it here as escaping characters in CMake add_definition's argument seems very problematic.
+#define EIGEN_DOCS_IO_FORMAT IOFormat(3, AlignCols, " ", "\n", "", "")
+
 #define EIGEN_INHERIT_ASSIGNMENT_OPERATOR(Derived, Op) \
 template<typename OtherDerived> \
-Derived& operator Op(const MatrixBase<OtherDerived>& other) \
+EIGEN_STRONG_INLINE Derived& operator Op(const Eigen::MatrixBase<OtherDerived>& other) \
 { \
-  return Eigen::MatrixBase<Derived>::operator Op(other.derived()); \
+  return Base::operator Op(other.derived()); \
 } \
-Derived& operator Op(const Derived& other) \
+EIGEN_STRONG_INLINE Derived& operator Op(const Derived& other) \
 { \
-  return Eigen::MatrixBase<Derived>::operator Op(other); \
+  return Base::operator Op(other); \
 }

 #define EIGEN_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, Op) \
 template<typename Other> \
-Derived& operator Op(const Other& scalar) \
+EIGEN_STRONG_INLINE Derived& operator Op(const Other& scalar) \
 { \
-  return Eigen::MatrixBase<Derived>::operator Op(scalar); \
+  return Base::operator Op(scalar); \
 }

 #define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) \
@@ -135,8 +253,8 @@ typedef BaseClass Base; \
 typedef typename Eigen::ei_traits<Derived>::Scalar Scalar; \
 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; \
 typedef typename Base::PacketScalar PacketScalar; \
+typedef typename Base::CoeffReturnType CoeffReturnType; \
 typedef typename Eigen::ei_nested<Derived>::type Nested; \
-typedef typename Eigen::ei_eval<Derived>::type Eval; \
 enum { RowsAtCompileTime = Eigen::ei_traits<Derived>::RowsAtCompileTime, \
       ColsAtCompileTime = Eigen::ei_traits<Derived>::ColsAtCompileTime, \
       MaxRowsAtCompileTime = Eigen::ei_traits<Derived>::MaxRowsAtCompileTime, \
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -2,7 +2,8 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -26,52 +27,167 @@
 #ifndef EIGEN_MEMORY_H
 #define EIGEN_MEMORY_H

-#ifdef EIGEN_VECTORIZE
-// it seems we cannot assume posix_memalign is defined in the stdlib header
-extern "C" int posix_memalign (void **, size_t, size_t) throw ();
+#if defined(__APPLE__) || defined(_WIN64)
+  #define EIGEN_MALLOC_ALREADY_ALIGNED 1
+#else
+  #define EIGEN_MALLOC_ALREADY_ALIGNED 0
 #endif

-/** \internal
-  * Static array automatically aligned if the total byte size is a multiple of 16
+#if ((defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
+  #define EIGEN_HAS_POSIX_MEMALIGN 1
+#else
+  #define EIGEN_HAS_POSIX_MEMALIGN 0
+#endif
+
+#ifdef EIGEN_VECTORIZE_SSE
+  #define EIGEN_HAS_MM_MALLOC 1
+#else
+  #define EIGEN_HAS_MM_MALLOC 0
+#endif
+
+/** \internal like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
+  * Fast, but wastes 16 additional bytes of memory.
+  * Does not throw any exception.
  */
-template <typename T, int Size, bool Align> struct ei_aligned_array
+inline void* ei_handmade_aligned_malloc(size_t size)
 {
-  EIGEN_ALIGN_128 T array[Size];
-};
-
-template <typename T, int Size> struct ei_aligned_array<T,Size,false>
-{
-  T array[Size];
-};
-
-/** \internal allocates \a size * sizeof(\a T) bytes with a 16 bytes based alignment */
-template<typename T>
-inline T* ei_aligned_malloc(size_t size)
-{
-  #ifdef EIGEN_VECTORIZE
-  if (ei_packet_traits<T>::size>1)
-  {
-    void* ptr;
-    if (posix_memalign(&ptr, 16, size*sizeof(T))==0)
-      return static_cast<T*>(ptr);
-    else
-      return 0;
-  }
-  else
-  #endif
-    return new T[size];
+  void *original = malloc(size+16);
+  void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+  *(reinterpret_cast<void**>(aligned) - 1) = original;
+  return aligned;
 }

-/** \internal free memory allocated with ei_aligned_malloc */
-template<typename T>
-inline void ei_aligned_free(T* ptr)
+/** \internal frees memory allocated with ei_handmade_aligned_malloc */
+inline void ei_handmade_aligned_free(void *ptr)
 {
-  #ifdef EIGEN_VECTORIZE
-  if (ei_packet_traits<T>::size>1)
-    free(ptr);
-  else
+  if(ptr)
+    free(*(reinterpret_cast<void**>(ptr) - 1));
+}
+
+/** \internal allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment.
+  * On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
+  */
+inline void* ei_aligned_malloc(size_t size)
+{
+  #ifdef EIGEN_NO_MALLOC
+    ei_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
  #endif
-    delete[] ptr;
+
+  void *result;  
+  #if !EIGEN_ALIGN
+    result = malloc(size);
+  #elif EIGEN_MALLOC_ALREADY_ALIGNED
+    result = malloc(size);
+  #elif EIGEN_HAS_POSIX_MEMALIGN
+    if(posix_memalign(&result, 16, size)) result = 0;
+  #elif EIGEN_HAS_MM_MALLOC
+    result = _mm_malloc(size, 16);
+  #elif (defined _MSC_VER)
+    result = _aligned_malloc(size, 16);
+  #else
+    result = ei_handmade_aligned_malloc(size);
+  #endif
+    
+  #ifdef EIGEN_EXCEPTIONS
+    if(result == 0)
+      throw std::bad_alloc();
+  #endif
+  return result;
+}
+
+/** allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
+  * On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
+  */
+template<bool Align> inline void* ei_conditional_aligned_malloc(size_t size)
+{
+  return ei_aligned_malloc(size);
+}
+
+template<> inline void* ei_conditional_aligned_malloc<false>(size_t size)
+{
+  #ifdef EIGEN_NO_MALLOC
+    ei_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
+  #endif
+
+  void *result = malloc(size);
+  #ifdef EIGEN_EXCEPTIONS
+    if(!result) throw std::bad_alloc();
+  #endif
+  return result;
+}
+
+/** allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
+  * On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown.
+  * The default constructor of T is called.
+  */
+template<typename T> inline T* ei_aligned_new(size_t size)
+{
+  void *void_result = ei_aligned_malloc(sizeof(T)*size);
+  return ::new(void_result) T[size];
+}
+
+template<typename T, bool Align> inline T* ei_conditional_aligned_new(size_t size)
+{
+  void *void_result = ei_conditional_aligned_malloc<Align>(sizeof(T)*size);
+  return ::new(void_result) T[size];
+}
+
+/** \internal free memory allocated with ei_aligned_malloc
+  */
+inline void ei_aligned_free(void *ptr)
+{
+  #if !EIGEN_ALIGN
+    free(ptr);
+  #elif EIGEN_MALLOC_ALREADY_ALIGNED
+    free(ptr);
+  #elif EIGEN_HAS_POSIX_MEMALIGN
+    free(ptr);
+  #elif EIGEN_HAS_MM_MALLOC
+    _mm_free(ptr);
+  #elif defined(_MSC_VER)
+    _aligned_free(ptr);
+  #else
+    ei_handmade_aligned_free(ptr);
+  #endif
+}
+
+/** \internal free memory allocated with ei_conditional_aligned_malloc
+  */
+template<bool Align> inline void ei_conditional_aligned_free(void *ptr)
+{
+  ei_aligned_free(ptr);
+}
+
+template<> inline void ei_conditional_aligned_free<false>(void *ptr)
+{
+  free(ptr);
+}
+
+/** \internal delete the elements of an array.
+  * The \a size parameters tells on how many objects to call the destructor of T.
+  */
+template<typename T> inline void ei_delete_elements_of_array(T *ptr, size_t size)
+{
+  // always destruct an array starting from the end.
+  while(size) ptr[--size].~T();
+}
+
+/** \internal delete objects constructed with ei_aligned_new
+  * The \a size parameters tells on how many objects to call the destructor of T.
+  */
+template<typename T> inline void ei_aligned_delete(T *ptr, size_t size)
+{
+  ei_delete_elements_of_array<T>(ptr, size);
+  ei_aligned_free(ptr);
+}
+
+/** \internal delete objects constructed with ei_conditional_aligned_new
+  * The \a size parameters tells on how many objects to call the destructor of T.
+  */
+template<typename T, bool Align> inline void ei_conditional_aligned_delete(T *ptr, size_t size)
+{
+  ei_delete_elements_of_array<T>(ptr, size);
+  ei_conditional_aligned_free<Align>(ptr);
 }

 /** \internal \returns the number of elements which have to be skipped such that data are 16 bytes aligned */
@@ -83,151 +199,170 @@ inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
  const int PacketAlignedMask = PacketSize-1;
  const bool Vectorized = PacketSize>1;
  return Vectorized
-          ? std::min<int>( (PacketSize - ((size_t(ptr)/sizeof(Scalar)) & PacketAlignedMask))
+          ? std::min<int>( (PacketSize - (int((size_t(ptr)/sizeof(Scalar))) & PacketAlignedMask))
                           & PacketAlignedMask, maxOffset)
          : 0;
 }

 /** \internal
-  * ei_alloc_stack(TYPE,SIZE) allocates sizeof(TYPE)*SIZE bytes on the stack if sizeof(TYPE)*SIZE is
-  * smaller than EIGEN_STACK_ALLOCATION_LIMIT. Otherwise the memory is allocated using the operator new.
-  * Data allocated with ei_alloc_stack \b must be freed calling ei_free_stack(PTR,TYPE,SIZE).
+  * ei_aligned_stack_alloc(SIZE) allocates an aligned buffer of SIZE bytes
+  * on the stack if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT.
+  * Otherwise the memory is allocated on the heap.
+  * Data allocated with ei_aligned_stack_alloc \b must be freed by calling ei_aligned_stack_free(PTR,SIZE).
  * \code
-  * float * data = ei_alloc_stack(float,array.size());
+  * float * data = ei_aligned_stack_alloc(float,array.size());
  * // ...
-  * ei_free_stack(data,float,array.size());
+  * ei_aligned_stack_free(data,float,array.size());
  * \endcode
  */
 #ifdef __linux__
-# define ei_alloc_stack(TYPE,SIZE) ((sizeof(TYPE)*(SIZE)>16000000) ? new TYPE[SIZE] : (TYPE*)alloca(sizeof(TYPE)*(SIZE)))
-# define ei_free_stack(PTR,TYPE,SIZE) if (sizeof(TYPE)*SIZE>16000000) delete[] PTR
+  #define ei_aligned_stack_alloc(SIZE) (SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) \
+                                    ? alloca(SIZE) \
+                                    : ei_aligned_malloc(SIZE)
+  #define ei_aligned_stack_free(PTR,SIZE) if(SIZE>EIGEN_STACK_ALLOCATION_LIMIT) ei_aligned_free(PTR)
 #else
-# define ei_alloc_stack(TYPE,SIZE) new TYPE[SIZE]
-# define ei_free_stack(PTR,TYPE,SIZE) delete[] PTR
+  #define ei_aligned_stack_alloc(SIZE) ei_aligned_malloc(SIZE)
+  #define ei_aligned_stack_free(PTR,SIZE) ei_aligned_free(PTR)
 #endif

-/** \class WithAlignedOperatorNew
-  *
-  * \brief Enforces inherited classes to be 16 bytes aligned when dynamicalled allocated with operator new
-  *
-  * When Eigen's explicit vectorization is enabled, Eigen assumes that some fixed sizes types are aligned
-  * on a 16 bytes boundary. Such types include:
-  *  - Vector2d, Vector4f, Vector4i, Vector4d,
-  *  - Matrix2d, Matrix4f, Matrix4i, Matrix4d,
-  *  - etc.
-  * When objects are statically allocated, the compiler will automatically and always enforces 16 bytes
-  * alignment of the data. However some troubles might appear when data are dynamically allocated.
-  * Let's pick an example:
-  * \code
-  * struct Foo {
-  *   char dummy;
-  *   Vector4f some_vector;
-  * };
-  * Foo obj1;                           // static allocation
-  * obj1.some_vector = Vector4f(..);    // =>   OK
-  *
-  * Foo *pObj2 = new Foo;               // dynamic allocation
-  * pObj2->some_vector = Vector4f(..);  // =>  !! might segfault !!
-  * \endcode
-  * Here, the problem is that operator new is not aware of the compile time alignment requirement of the
-  * type Vector4f (and hence of the type Foo). Therefore "new Foo" does not necessarily returned a 16 bytes
-  * aligned pointer. The purpose of the class WithAlignedOperatorNew is exactly to overcome this issue, by
-  * overloading the operator new to return aligned data when the vectorization is enabled.
-  * Here is a similar safe example:
-  * \code
-  * struct Foo : WithAlignedOperatorNew {
-  *   char dummy;
-  *   Vector4f some_vector;
-  * };
-  * Foo obj1;                           // static allocation
-  * obj1.some_vector = Vector4f(..);    // =>   OK
-  *
-  * Foo *pObj2 = new Foo;               // dynamic allocation
-  * pObj2->some_vector = Vector4f(..);  // =>  SAFE !
-  * \endcode 
-  *
-  * \sa class ei_new_allocator
-  */
-struct WithAlignedOperatorNew
-{
-  #ifdef EIGEN_VECTORIZE
+#define ei_aligned_stack_new(TYPE,SIZE) ::new(ei_aligned_stack_alloc(sizeof(TYPE)*SIZE)) TYPE[SIZE]
+#define ei_aligned_stack_delete(TYPE,PTR,SIZE) do {ei_delete_elements_of_array<TYPE>(PTR, SIZE); \
+                                                   ei_aligned_stack_free(PTR,sizeof(TYPE)*SIZE);} while(0)

-  void *operator new(size_t size) throw()
-  {
-    void* ptr = 0;
-    if (posix_memalign(&ptr, 16, size)==0)
-      return ptr;
-    else
-      return 0;
-  }

-  void *operator new[](size_t size) throw()
-  {
-    void* ptr = 0;
-    if (posix_memalign(&ptr, 16, size)==0)
-      return ptr;
-    else
-      return 0;
-  }
-
-  void operator delete(void * ptr) { free(ptr); }
-  void operator delete[](void * ptr) { free(ptr); }
-  
+#if EIGEN_ALIGN
+  #ifdef EIGEN_EXCEPTIONS
+    #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+      void* operator new(size_t size, const std::nothrow_t&) throw() { \
+        try { return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); } \
+        catch (...) { return 0; } \
+        return 0; \
+      }
+  #else
+    #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+      void* operator new(size_t size, const std::nothrow_t&) throw() { \
+        return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); \
+      }
  #endif
-};

-template<typename T, int SizeAtCompileTime,
-         bool NeedsToAlign = (SizeAtCompileTime!=Dynamic) && ((sizeof(T)*SizeAtCompileTime)%16==0)>
-struct ei_with_aligned_operator_new : WithAlignedOperatorNew {};
+  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
+      void *operator new(size_t size) { \
+        return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); \
+      } \
+      void *operator new[](size_t size) { \
+        return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); \
+      } \
+      void operator delete(void * ptr) throw() { Eigen::ei_conditional_aligned_free<NeedsToAlign>(ptr); } \
+      void operator delete[](void * ptr) throw() { Eigen::ei_conditional_aligned_free<NeedsToAlign>(ptr); } \
+      /* in-place new and delete. since (at least afaik) there is no actual   */ \
+      /* memory allocated we can safely let the default implementation handle */ \
+      /* this particular case. */ \
+      static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
+      void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \
+      /* nothrow-new (returns zero instead of std::bad_alloc) */ \
+      EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+      void operator delete(void *ptr, const std::nothrow_t&) throw() { \
+        Eigen::ei_conditional_aligned_free<NeedsToAlign>(ptr); \
+      } \
+      typedef void ei_operator_new_marker_type;
+#else
+  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
+#endif

-template<typename T, int SizeAtCompileTime>
-struct ei_with_aligned_operator_new<T,SizeAtCompileTime,false> {};
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%16==0))

-/** \class ei_new_allocator
-  *
-  * \brief stl compatible allocator to use with with fixed-size vector and matrix types
-  *
-  * STL allocator simply wrapping operators new[] and delete[]. Unlike GCC's default new_allocator,
-  * ei_new_allocator call operator new on the type \a T and not the general new operator ignoring
-  * overloaded version of operator new.
-  * 
-  * Example:
-  * \code
-  * // Vector4f requires 16 bytes alignment:
-  * std::vector<Vector4f,ei_new_allocator<Vector4f> > dataVec4;
-  * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
-  * std::vector<Vector3f> dataVec3;
-  * 
-  * struct Foo : WithAlignedOperatorNew {
-  *   char dummy;
-  *   Vector4f some_vector;
-  * };
-  * std::vector<Foo,ei_new_allocator<Foo> > dataFoo;
-  * \endcode
-  *
-  * \sa class WithAlignedOperatorNew
-  */
-template<typename T> class ei_new_allocator
+
+/** \class aligned_allocator
+*
+* \brief stl compatible allocator to use with with 16 byte aligned types
+*
+* Example:
+* \code
+* // Matrix4f requires 16 bytes alignment:
+* std::map< int, Matrix4f, std::less<int>, aligned_allocator<Matrix4f> > my_map_mat4;
+* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
+* std::map< int, Vector3f > my_map_vec3;
+* \endcode
+*
+*/
+template<class T>
+class aligned_allocator
 {
-  public:
-    typedef T         value_type;
+public:
+    typedef size_t    size_type;
+    typedef ptrdiff_t difference_type;
    typedef T*        pointer;
    typedef const T*  const_pointer;
    typedef T&        reference;
    typedef const T&  const_reference;
+    typedef T         value_type;

-    template<typename OtherType>
+    template<class U>
    struct rebind
-    { typedef ei_new_allocator<OtherType> other; };
+    {
+        typedef aligned_allocator<U> other;
+    };

-    T* address(T& ref) const { return &ref; }
-    const T* address(const T& ref) const { return &ref; }
-    T* allocate(size_t size, const void* = 0) { return new T[size]; }
-    void deallocate(T* ptr, size_t) { delete[] ptr; }
-    size_t max_size() const { return size_t(-1) / sizeof(T); }
-    // FIXME I'm note sure about this construction...
-    void construct(T* ptr, const T& refObj) { ::new(ptr) T(refObj); }
-    void destroy(T* ptr) { ptr->~T(); }
+    pointer address( reference value ) const 
+    {
+        return &value;
+    }
+
+    const_pointer address( const_reference value ) const 
+    {
+        return &value;
+    }
+
+    aligned_allocator() throw() 
+    {
+    }
+
+    aligned_allocator( const aligned_allocator& ) throw() 
+    {
+    }
+
+    template<class U>
+    aligned_allocator( const aligned_allocator<U>& ) throw() 
+    {
+    }
+
+    ~aligned_allocator() throw() 
+    {
+    }
+
+    size_type max_size() const throw() 
+    {
+        return std::numeric_limits<size_type>::max();
+    }
+
+    pointer allocate( size_type num, const_pointer* hint = 0 )
+    {
+        static_cast<void>( hint ); // suppress unused variable warning
+        return static_cast<pointer>( ei_aligned_malloc( num * sizeof(T) ) );
+    }
+
+    void construct( pointer p, const T& value ) 
+    {
+        ::new( p ) T( value );
+    }
+
+    void destroy( pointer p ) 
+    {
+        p->~T();
+    }
+
+    void deallocate( pointer p, size_type /*num*/ ) 
+    {
+        ei_aligned_free( p );
+    }
+    
+    bool operator!=(const aligned_allocator<T>& other) const
+    { return false; }
+    
+    bool operator==(const aligned_allocator<T>& other) const
+    { return true; }
 };

 #endif // EIGEN_MEMORY_H
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -64,12 +64,20 @@ template<typename T> struct ei_cleantype<T&>        { typedef typename ei_cleant
 template<typename T> struct ei_cleantype<const T*>  { typedef typename ei_cleantype<T>::type type; };
 template<typename T> struct ei_cleantype<T*>        { typedef typename ei_cleantype<T>::type type; };

+/** \internal Allows to enable/disable an overload
+  * according to a compile time condition.
+  */
+template<bool Condition, typename T> struct ei_enable_if;
+
+template<typename T> struct ei_enable_if<true,T>
+{ typedef T type; };
+
 /** \internal
  * Convenient struct to get the result type of a unary or binary functor.
  *
  * It supports both the current STL mechanism (using the result_type member) as well as
  * upcoming next STL generation (using a templated result member).
-  * If none of these members is provided, then the type of the first argument is returned.
+  * If none of these members is provided, then the type of the first argument is returned. FIXME, that behavior is a pretty bad hack.
  */
 template<typename T> struct ei_result_of {};

@@ -146,4 +154,38 @@ class ei_meta_sqrt
 template<int Y, int InfX, int SupX>
 class ei_meta_sqrt<Y, InfX, SupX, true> { public:  enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };

+/** \internal determines whether the product of two numeric types is allowed and what the return type is */
+template<typename T, typename U> struct ei_scalar_product_traits
+{
+  // dummy general case where T and U aren't compatible -- not allowed anyway but we catch it elsewhere
+  //enum { Cost = NumTraits<T>::MulCost };
+  typedef T ReturnType;
+};
+
+template<typename T> struct ei_scalar_product_traits<T,T>
+{
+  //enum { Cost = NumTraits<T>::MulCost };
+  typedef T ReturnType;
+};
+
+template<typename T> struct ei_scalar_product_traits<T,std::complex<T> >
+{
+  //enum { Cost = 2*NumTraits<T>::MulCost };
+  typedef std::complex<T> ReturnType;
+};
+
+template<typename T> struct ei_scalar_product_traits<std::complex<T>, T>
+{
+  //enum { Cost = 2*NumTraits<T>::MulCost  };
+  typedef std::complex<T> ReturnType;
+};
+
+// FIXME quick workaround around current limitation of ei_result_of
+template<typename Scalar, typename ArgType0, typename ArgType1>
+struct ei_result_of<ei_scalar_product_op<Scalar>(ArgType0,ArgType1)> {
+typedef typename ei_scalar_product_traits<typename ei_cleantype<ArgType0>::type, typename ei_cleantype<ArgType1>::type>::ReturnType type;
+};
+
+
+
 #endif // EIGEN_META_H
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -44,7 +44,7 @@
  #ifdef __GXX_EXPERIMENTAL_CXX0X__

    // if native static_assert is enabled, let's use it
-    #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG)
+    #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);

  #else // CXX0X

@@ -55,29 +55,50 @@
    struct ei_static_assert<true>
    {
      enum {
-        you_tried_calling_a_vector_method_on_a_matrix,
-        you_mixed_vectors_of_different_sizes,
-        you_mixed_matrices_of_different_sizes,
-        this_method_is_only_for_vectors_of_a_specific_size,
-        this_method_is_only_for_matrices_of_a_specific_size,
-        you_did_a_programming_error,
-        you_called_a_fixed_size_method_on_a_dynamic_size_matrix_or_vector,
-        unaligned_load_and_store_operations_unimplemented_on_AltiVec,
-        scalar_type_must_be_floating_point,
-        default_writting_to_selfadjoint_not_supported,
-        writting_to_triangular_part_with_unit_diag_is_not_supported,
-        this_method_is_only_for_fixed_size
+        YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX,
+        YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES,
+        YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES,
+        THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
+        THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
+        YOU_MADE_A_PROGRAMMING_MISTAKE,
+        YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR,
+        UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC,
+        NUMERIC_TYPE_MUST_BE_FLOATING_POINT,
+        COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED,
+        WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED,
+        THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE,
+        INVALID_MATRIX_PRODUCT,
+        INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS,
+        INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION,
+        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY,
+        THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES,
+        THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES,
+        INVALID_MATRIX_TEMPLATE_PARAMETERS,
+        BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
+        THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX
      };
    };

-    #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
-      if (ei_static_assert<CONDITION ? true : false>::MSG) {}
+    // Specialized implementation for MSVC to avoid "conditional
+    // expression is constant" warnings.  This implementation doesn't
+    // appear to work under GCC, hence the multiple implementations.
+    #ifdef _MSC_VER

-  #endif // CXX0X
+      #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
+        {Eigen::ei_static_assert<CONDITION ? true : false>::MSG;}
+
+    #else
+
+      #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
+        if (Eigen::ei_static_assert<CONDITION ? true : false>::MSG) {}
+
+    #endif
+
+  #endif // not CXX0X

 #else // EIGEN_NO_STATIC_ASSERT

-  #define EIGEN_STATIC_ASSERT(CONDITION,MSG) ei_assert((CONDITION) && #MSG)
+  #define EIGEN_STATIC_ASSERT(CONDITION,MSG) ei_assert((CONDITION) && #MSG);

 #endif // EIGEN_NO_STATIC_ASSERT

@@ -85,22 +106,22 @@
 // static assertion failing if the type \a TYPE is not a vector type
 #define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \
  EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime, \
-                      you_tried_calling_a_vector_method_on_a_matrix)
+                      YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX)

 // static assertion failing if the type \a TYPE is not fixed-size
 #define EIGEN_STATIC_ASSERT_FIXED_SIZE(TYPE) \
  EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime!=Eigen::Dynamic, \
-                      you_called_a_fixed_size_method_on_a_dynamic_size_matrix_or_vector)
+                      YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR)

 // static assertion failing if the type \a TYPE is not a vector type of the given size
 #define EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(TYPE, SIZE) \
  EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime && TYPE::SizeAtCompileTime==SIZE, \
-                      this_method_is_only_for_vectors_of_a_specific_size)
+                      THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE)

 // static assertion failing if the type \a TYPE is not a vector type of the given size
 #define EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(TYPE, ROWS, COLS) \
  EIGEN_STATIC_ASSERT(TYPE::RowsAtCompileTime==ROWS && TYPE::ColsAtCompileTime==COLS, \
-                      this_method_is_only_for_matrices_of_a_specific_size)
+                      THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE)

 // static assertion failing if the two vector expression types are not compatible (same fixed-size or dynamic size)
 #define EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(TYPE0,TYPE1) \
@@ -108,17 +129,20 @@
      (int(TYPE0::SizeAtCompileTime)==Eigen::Dynamic \
    || int(TYPE1::SizeAtCompileTime)==Eigen::Dynamic \
    || int(TYPE0::SizeAtCompileTime)==int(TYPE1::SizeAtCompileTime)),\
-    you_mixed_vectors_of_different_sizes)
+    YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES)

-// static assertion failing if the two matrix expression types are not compatible (same fixed-size or dynamic size)
-#define EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
-  EIGEN_STATIC_ASSERT( \
-     ((int(TYPE0::RowsAtCompileTime)==Eigen::Dynamic \
+#define EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
+      ((int(TYPE0::RowsAtCompileTime)==Eigen::Dynamic \
    || int(TYPE1::RowsAtCompileTime)==Eigen::Dynamic \
    || int(TYPE0::RowsAtCompileTime)==int(TYPE1::RowsAtCompileTime)) \
   && (int(TYPE0::ColsAtCompileTime)==Eigen::Dynamic \
    || int(TYPE1::ColsAtCompileTime)==Eigen::Dynamic \
-    || int(TYPE0::ColsAtCompileTime)==int(TYPE1::ColsAtCompileTime))),\
-    you_mixed_matrices_of_different_sizes)
+    || int(TYPE0::ColsAtCompileTime)==int(TYPE1::ColsAtCompileTime)))
+
+// static assertion failing if it is guaranteed at compile-time that the two matrix expression types have different sizes
+#define EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
+  EIGEN_STATIC_ASSERT( \
+     EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1),\
+    YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES)

 #endif // EIGEN_STATIC_ASSERT_H
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -41,6 +41,10 @@ class ei_no_assignment_operator
    ei_no_assignment_operator& operator=(const ei_no_assignment_operator&);
 };

+/** \internal If the template parameter Value is Dynamic, this class is just a wrapper around an int variable that
+  * can be accessed using value() and setValue().
+  * Otherwise, this class is an empty structure and value() just returns the template parameter Value.
+  */
 template<int Value> class ei_int_if_dynamic EIGEN_EMPTY_STRUCT
 {
  public:
@@ -69,11 +73,7 @@ template<typename T> struct ei_functor_traits
  };
 };

-template<typename T> struct ei_packet_traits
-{
-  typedef T type;
-  enum {size=1};
-};
+template<typename T> struct ei_packet_traits;

 template<typename T> struct ei_unpacket_traits
 {
@@ -81,22 +81,16 @@ template<typename T> struct ei_unpacket_traits
  enum {size=1};
 };

-
-template<typename Scalar, int Rows, int Cols, int StorageOrder, int MaxRows, int MaxCols>
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
 class ei_compute_matrix_flags
 {
    enum {
-      row_major_bit = (Rows != 1 && Cols != 1)  // if this is not a vector,
-                                                // then the storage order really matters,
-                                                // so let us strictly honor the user's choice.
-                    ? StorageOrder
-                    : Cols > 1 ? RowMajorBit : 0,
+      row_major_bit = Options&RowMajor ? RowMajorBit : 0,
      inner_max_size = row_major_bit ? MaxCols : MaxRows,
      is_big = inner_max_size == Dynamic,
-      is_packet_size_multiple = (Cols * Rows)%ei_packet_traits<Scalar>::size==0,
-      packet_access_bit = ei_packet_traits<Scalar>::size > 1
-                          && (is_big || is_packet_size_multiple) ? PacketAccessBit : 0,
-      aligned_bit = packet_access_bit && (is_big || is_packet_size_multiple) ? AlignedBit : 0
+      is_packet_size_multiple = (Cols*Rows) % ei_packet_traits<Scalar>::size == 0,
+      aligned_bit = (((Options&DontAlign)==0) && (is_big || is_packet_size_multiple)) ? AlignedBit : 0,
+      packet_access_bit = ei_packet_traits<Scalar>::size > 1 && aligned_bit ? PacketAccessBit : 0
    };

  public:
@@ -108,6 +102,10 @@ template<int _Rows, int _Cols> struct ei_size_at_compile_time
  enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
 };

+/* ei_eval : the return type of eval(). For matrices, this is just a const reference
+ * in order to avoid a useless copy
+ */
+
 template<typename T, int Sparseness = ei_traits<T>::Flags&SparseBit> class ei_eval;

 template<typename T> struct ei_eval<T,IsDense>
@@ -115,7 +113,41 @@ template<typename T> struct ei_eval<T,IsDense>
  typedef Matrix<typename ei_traits<T>::Scalar,
                ei_traits<T>::RowsAtCompileTime,
                ei_traits<T>::ColsAtCompileTime,
-                ei_traits<T>::Flags&RowMajorBit ? RowMajor : ColMajor,
+                AutoAlign | (ei_traits<T>::Flags&RowMajorBit ? RowMajor : ColMajor),
+                ei_traits<T>::MaxRowsAtCompileTime,
+                ei_traits<T>::MaxColsAtCompileTime
+          > type;
+};
+
+// for matrices, no need to evaluate, just use a const reference to avoid a useless copy
+template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder, int _MaxRows, int _MaxCols>
+struct ei_eval<Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols>, IsDense>
+{
+  typedef const Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols>& type;
+};
+
+/* ei_plain_matrix_type : the difference from ei_eval is that ei_plain_matrix_type is always a plain matrix type,
+ * whereas ei_eval is a const reference in the case of a matrix
+ */
+template<typename T> struct ei_plain_matrix_type
+{
+  typedef Matrix<typename ei_traits<T>::Scalar,
+                ei_traits<T>::RowsAtCompileTime,
+                ei_traits<T>::ColsAtCompileTime,
+                AutoAlign | (ei_traits<T>::Flags&RowMajorBit ? RowMajor : ColMajor),
+                ei_traits<T>::MaxRowsAtCompileTime,
+                ei_traits<T>::MaxColsAtCompileTime
+          > type;
+};
+
+/* ei_plain_matrix_type_column_major : same as ei_plain_matrix_type but guaranteed to be column-major
+ */
+template<typename T> struct ei_plain_matrix_type_column_major
+{
+  typedef Matrix<typename ei_traits<T>::Scalar,
+                ei_traits<T>::RowsAtCompileTime,
+                ei_traits<T>::ColsAtCompileTime,
+                AutoAlign | ColMajor,
                ei_traits<T>::MaxRowsAtCompileTime,
                ei_traits<T>::MaxColsAtCompileTime
          > type;
@@ -124,7 +156,25 @@ template<typename T> struct ei_eval<T,IsDense>
 template<typename T> struct ei_must_nest_by_value { enum { ret = false }; };
 template<typename T> struct ei_must_nest_by_value<NestByValue<T> > { enum { ret = true }; };

-template<typename T, int n=1, typename EvalType = typename ei_eval<T>::type> struct ei_nested
+/** \internal Determines how a given expression should be nested into another one.
+  * For example, when you do a * (b+c), Eigen will determine how the expression b+c should be
+  * nested into the bigger product expression. The choice is between nesting the expression b+c as-is, or
+  * evaluating that expression b+c into a temporary variable d, and nest d so that the resulting expression is
+  * a*d. Evaluating can be beneficial for example if every coefficient access in the resulting expression causes
+  * many coefficient accesses in the nested expressions -- as is the case with matrix product for example.
+  *
+  * \param T the type of the expression being nested
+  * \param n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression.
+  *
+  * Example. Suppose that a, b, and c are of type Matrix3d. The user forms the expression a*(b+c).
+  * b+c is an expression "sum of matrices", which we will denote by S. In order to determine how to nest it,
+  * the Product expression uses: ei_nested<S, 3>::ret, which turns out to be Matrix3d because the internal logic of
+  * ei_nested determined that in this case it was better to evaluate the expression b+c into a temporary. On the other hand,
+  * since a is of type Matrix3d, the Product expression nests it as ei_nested<Matrix3d, 3>::ret, which turns out to be
+  * const Matrix3d&, because the internal logic of ei_nested determined that since a was already a matrix, there was no point
+  * in copying it into another matrix.
+  */
+template<typename T, int n=1, typename PlainMatrixType = typename ei_eval<T>::type> struct ei_nested
 {
  enum {
    CostEval   = (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost),
@@ -136,7 +186,7 @@ template<typename T, int n=1, typename EvalType = typename ei_eval<T>::type> str
    typename ei_meta_if<
      (int(ei_traits<T>::Flags) & EvalBeforeNestingBit)
      || ( int(CostEval) <= int(CostNoEval) ),
-      EvalType,
+      PlainMatrixType,
      const T&
    >::ret
  >::ret type;
@@ -148,6 +198,28 @@ template<unsigned int Flags> struct ei_are_flags_consistent
  };
 };

+/** \internal Helper base class to add a scalar multiple operator
+  * overloads for complex types */
+template<typename Derived,typename Scalar,typename OtherScalar,
+         bool EnableIt = !ei_is_same_type<Scalar,OtherScalar>::ret >
+struct ei_special_scalar_op_base
+{
+  // dummy operator* so that the 
+  // "using ei_special_scalar_op_base::operator*" compiles
+  void operator*() const;
+};
+
+template<typename Derived,typename Scalar,typename OtherScalar>
+struct ei_special_scalar_op_base<Derived,Scalar,OtherScalar,true>
+{
+  const CwiseUnaryOp<ei_scalar_multiple2_op<Scalar,OtherScalar>, Derived>
+  operator*(const OtherScalar& scalar) const
+  {
+    return CwiseUnaryOp<ei_scalar_multiple2_op<Scalar,OtherScalar>, Derived>
+      (*static_cast<const Derived*>(this), ei_scalar_multiple2_op<Scalar,OtherScalar>(scalar));
+  }
+};
+
 /** \internal Gives the type of a sub-matrix or sub-vector of a matrix of type \a ExpressionType and size \a Size
  * TODO: could be a good idea to define a big ReturnType struct ??
  */
@@ -157,4 +229,26 @@ template<typename ExpressionType, int RowsOrSize=Dynamic, int Cols=Dynamic> stru
  typedef Block<ExpressionType, RowsOrSize, Cols> Type;
 };

+template<typename ExpressionType> struct HNormalizedReturnType {
+
+  enum {
+    SizeAtCompileTime = ExpressionType::SizeAtCompileTime,
+    SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
+  };
+  typedef Block<ExpressionType,
+                ei_traits<ExpressionType>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
+                ei_traits<ExpressionType>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> StartMinusOne;
+  typedef CwiseUnaryOp<ei_scalar_quotient1_op<typename ei_traits<ExpressionType>::Scalar>, 
+              NestByValue<StartMinusOne> > Type;
+};
+
+template<typename XprType, typename CastType> struct ei_cast_return_type
+{
+  typedef typename XprType::Scalar CurrentScalarType;
+  typedef typename ei_cleantype<CastType>::type _CastType;
+  typedef typename _CastType::Scalar NewScalarType;
+  typedef typename ei_meta_if<ei_is_same_type<CurrentScalarType,NewScalarType>::ret,
+                              const XprType&,CastType>::ret type;
+};
+
 #endif // EIGEN_XPRHELPER_H
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h
@@ -0,0 +1,212 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_ALIGNEDBOX_H
+#define EIGEN_ALIGNEDBOX_H
+
+/** \geometry_module \ingroup Geometry_Module
+  * \nonstableyet
+  *
+  * \class AlignedBox
+  *
+  * \brief An axis aligned box
+  *
+  * \param _Scalar the type of the scalar coefficients
+  * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
+  *
+  * This class represents an axis aligned box as a pair of the minimal and maximal corners.
+  */
+template <typename _Scalar, int _AmbientDim>
+class AlignedBox
+{
+public:
+EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
+  enum { AmbientDimAtCompileTime = _AmbientDim };
+  typedef _Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef Matrix<Scalar,AmbientDimAtCompileTime,1> VectorType;
+
+  /** Default constructor initializing a null box. */
+  inline explicit AlignedBox()
+  { if (AmbientDimAtCompileTime!=Dynamic) setNull(); }
+
+  /** Constructs a null box with \a _dim the dimension of the ambient space. */
+  inline explicit AlignedBox(int _dim) : m_min(_dim), m_max(_dim)
+  { setNull(); }
+
+  /** Constructs a box with extremities \a _min and \a _max. */
+  inline AlignedBox(const VectorType& _min, const VectorType& _max) : m_min(_min), m_max(_max) {}
+
+  /** Constructs a box containing a single point \a p. */
+  inline explicit AlignedBox(const VectorType& p) : m_min(p), m_max(p) {}
+
+  ~AlignedBox() {}
+
+  /** \returns the dimension in which the box holds */
+  inline int dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size()-1 : AmbientDimAtCompileTime; }
+
+  /** \returns true if the box is null, i.e, empty. */
+  inline bool isNull() const { return (m_min.cwise() > m_max).any(); }
+
+  /** Makes \c *this a null/empty box. */
+  inline void setNull()
+  {
+    m_min.setConstant( std::numeric_limits<Scalar>::max());
+    m_max.setConstant(-std::numeric_limits<Scalar>::max());
+  }
+
+  /** \returns the minimal corner */
+  inline const VectorType& min() const { return m_min; }
+  /** \returns a non const reference to the minimal corner */
+  inline VectorType& min() { return m_min; }
+  /** \returns the maximal corner */
+  inline const VectorType& max() const { return m_max; }
+  /** \returns a non const reference to the maximal corner */
+  inline VectorType& max() { return m_max; }
+
+  /** \returns the center of the box */
+  inline VectorType center() const { return (m_min + m_max) / 2; }
+
+  /** \returns true if the point \a p is inside the box \c *this. */
+  inline bool contains(const VectorType& p) const
+  { return (m_min.cwise()<=p).all() && (p.cwise()<=m_max).all(); }
+
+  /** \returns true if the box \a b is entirely inside the box \c *this. */
+  inline bool contains(const AlignedBox& b) const
+  { return (m_min.cwise()<=b.min()).all() && (b.max().cwise()<=m_max).all(); }
+
+  /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this. */
+  inline AlignedBox& extend(const VectorType& p)
+  { m_min = m_min.cwise().min(p); m_max = m_max.cwise().max(p); return *this; }
+
+  /** Extends \c *this such that it contains the box \a b and returns a reference to \c *this. */
+  inline AlignedBox& extend(const AlignedBox& b)
+  { m_min = m_min.cwise().min(b.m_min); m_max = m_max.cwise().max(b.m_max); return *this; }
+
+  /** Clamps \c *this by the box \a b and returns a reference to \c *this. */
+  inline AlignedBox& clamp(const AlignedBox& b)
+  { m_min = m_min.cwise().max(b.m_min); m_max = m_max.cwise().min(b.m_max); return *this; }
+
+  /** Returns an AlignedBox that is the intersection of \a b and \c *this */
+  inline AlignedBox intersection(const AlignedBox &b) const
+  { return AlignedBox(m_min.cwise().max(b.m_min), m_max.cwise().min(b.m_max)); }
+
+  /** Returns an AlignedBox that is the union of \a b and \c *this */
+  inline AlignedBox merged(const AlignedBox &b) const
+  { return AlignedBox(m_min.cwise().min(b.m_min), m_max.cwise().max(b.m_max)); }
+
+  /** Translate \c *this by the vector \a t and returns a reference to \c *this. */
+  inline AlignedBox& translate(const VectorType& t)
+  { m_min += t; m_max += t; return *this; }
+
+  /** \returns the squared distance between the point \a p and the box \c *this,
+    * and zero if \a p is inside the box.
+    * \sa exteriorDistance()
+    */
+  inline Scalar squaredExteriorDistance(const VectorType& p) const;
+
+  /** \returns the squared distance between the boxes \a b and \c *this,
+    * and zero if the boxes intersect.
+    * \sa exteriorDistance()
+    */
+  inline Scalar squaredExteriorDistance(const AlignedBox& b) const;
+
+  /** \returns the distance between the point \a p and the box \c *this,
+    * and zero if \a p is inside the box.
+    * \sa squaredExteriorDistance()
+    */
+  inline Scalar exteriorDistance(const VectorType& p) const
+  { return ei_sqrt(squaredExteriorDistance(p)); }
+
+  /** \returns the distance between the boxes \a b and \c *this,
+    * and zero if the boxes intersect.
+    * \sa squaredExteriorDistance()
+    */
+  inline Scalar exteriorDistance(const AlignedBox& b) const
+  { return ei_sqrt(squaredExteriorDistance(b)); }
+
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  inline typename ei_cast_return_type<AlignedBox,
+           AlignedBox<NewScalarType,AmbientDimAtCompileTime> >::type cast() const
+  {
+    return typename ei_cast_return_type<AlignedBox,
+                    AlignedBox<NewScalarType,AmbientDimAtCompileTime> >::type(*this);
+  }
+
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType>
+  inline explicit AlignedBox(const AlignedBox<OtherScalarType,AmbientDimAtCompileTime>& other)
+  {
+    m_min = other.min().template cast<Scalar>();
+    m_max = other.max().template cast<Scalar>();
+  }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  bool isApprox(const AlignedBox& other, typename NumTraits<Scalar>::Real prec = precision<Scalar>()) const
+  { return m_min.isApprox(other.m_min, prec) && m_max.isApprox(other.m_max, prec); }
+
+protected:
+
+  VectorType m_min, m_max;
+};
+
+template<typename Scalar,int AmbiantDim>
+inline Scalar AlignedBox<Scalar,AmbiantDim>::squaredExteriorDistance(const VectorType& p) const
+{
+  Scalar dist2 = 0.;
+  Scalar aux;
+  for (int k=0; k<dim(); ++k)
+  {
+    if ((aux = (p[k]-m_min[k]))<0.)
+      dist2 += aux*aux;
+    else if ( (aux = (m_max[k]-p[k]))<0. )
+      dist2 += aux*aux;
+  }
+  return dist2;
+}
+
+template<typename Scalar,int AmbiantDim>
+inline Scalar AlignedBox<Scalar,AmbiantDim>::squaredExteriorDistance(const AlignedBox& b) const
+{
+  Scalar dist2 = 0.;
+  Scalar aux;
+  for (int k=0; k<dim(); ++k)
+  {
+    if ((aux = (b.m_min[k]-m_max[k]))>0.)
+      dist2 += aux*aux;
+    else if ( (aux = (m_min[k]-b.m_max[k]))>0. )
+      dist2 += aux*aux;
+  }
+  return dist2;
+}
+
+#endif // EIGEN_ALIGNEDBOX_H
--- a/Eigen/src/Geometry/AngleAxis.h
+++ b/Eigen/src/Geometry/AngleAxis.h
@@ -25,7 +25,7 @@
 #ifndef EIGEN_ANGLEAXIS_H
 #define EIGEN_ANGLEAXIS_H

-/** \geometry_module \ingroup GeometryModule
+/** \geometry_module \ingroup Geometry_Module
  *
  * \class AngleAxis
  *
@@ -33,6 +33,8 @@
  *
  * \param _Scalar the scalar type, i.e., the type of the coefficients.
  *
+  * \warning When setting up an AngleAxis object, the axis vector \b must \b be \b normalized.
+  *
  * The following two typedefs are provided for convenience:
  * \li \c AngleAxisf for \c float
  * \li \c AngleAxisd for \c double
@@ -47,7 +49,7 @@
  * \note This class is not aimed to be used to store a rotation transformation,
  * but rather to make easier the creation of other rotation (Quaternion, rotation Matrix)
  * and transformation objects.
-  * 
+  *
  * \sa class Quaternion, class Transform, MatrixBase::UnitX()
  */

@@ -64,7 +66,7 @@ class AngleAxis : public RotationBase<AngleAxis<_Scalar>,3>
 public:

  using Base::operator*;
-  
+
  enum { Dim = 3 };
  /** the scalar type of the coefficients */
  typedef _Scalar Scalar;
@@ -82,7 +84,10 @@ public:
  /** Default constructor without initialization. */
  AngleAxis() {}
  /** Constructs and initialize the angle-axis rotation from an \a angle in radian
-    * and an \a axis which must be normalized. */
+    * and an \a axis which \b must \b be \b normalized.
+    * 
+    * \warning If the \a axis vector is not normalized, then the angle-axis object
+    *          represents an invalid rotation. */
  template<typename Derived>
  inline AngleAxis(Scalar angle, const MatrixBase<Derived>& axis) : m_axis(axis), m_angle(angle) {}
  /** Constructs and initialize the angle-axis rotation from a quaternion \a q. */
@@ -109,18 +114,6 @@ public:
  friend inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b)
  { return a * QuaternionType(b); }

-  /** Concatenates two rotations */
-  inline Matrix3 operator* (const Matrix3& other) const
-  { return toRotationMatrix() * other; }
-
-  /** Concatenates two rotations */
-  inline friend Matrix3 operator* (const Matrix3& a, const AngleAxis& b)
-  { return a * b.toRotationMatrix(); }
-
-  /** Applies rotation to vector */
-  inline Vector3 operator* (const Vector3& other) const
-  { return toRotationMatrix() * other; }
-
  /** \returns the inverse rotation, i.e., an angle-axis with opposite rotation angle */
  AngleAxis inverse() const
  { return AngleAxis(-m_angle, m_axis); }
@@ -132,12 +125,36 @@ public:
  template<typename Derived>
  AngleAxis& fromRotationMatrix(const MatrixBase<Derived>& m);
  Matrix3 toRotationMatrix(void) const;
+
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  inline typename ei_cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type cast() const
+  { return typename ei_cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type(*this); }
+
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType>
+  inline explicit AngleAxis(const AngleAxis<OtherScalarType>& other)
+  {
+    m_axis = other.axis().template cast<Scalar>();
+    m_angle = Scalar(other.angle());
+  }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  bool isApprox(const AngleAxis& other, typename NumTraits<Scalar>::Real prec = precision<Scalar>()) const
+  { return m_axis.isApprox(other.m_axis, prec) && ei_isApprox(m_angle,other.m_angle, prec); }
 };

-/** \ingroup GeometryModule
+/** \ingroup Geometry_Module
  * single precision angle-axis type */
 typedef AngleAxis<float> AngleAxisf;
-/** \ingroup GeometryModule
+/** \ingroup Geometry_Module
  * double precision angle-axis type */
 typedef AngleAxis<double> AngleAxisd;

@@ -147,7 +164,7 @@ typedef AngleAxis<double> AngleAxisd;
 template<typename Scalar>
 AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionType& q)
 {
-  Scalar n2 = q.vec().norm2();
+  Scalar n2 = q.vec().squaredNorm();
  if (n2 < precision<Scalar>()*precision<Scalar>())
  {
    m_angle = 0;
--- a/Eigen/src/Geometry/CMakeLists.txt
+++ b/Eigen/src/Geometry/CMakeLists.txt
@@ -2,5 +2,7 @@ FILE(GLOB Eigen_Geometry_SRCS "*.h")

 INSTALL(FILES
  ${Eigen_Geometry_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Geometry
+  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Geometry COMPONENT Devel
  )
+
+ADD_SUBDIRECTORY(arch)
--- a/Eigen/src/Geometry/EulerAngles.h
+++ b/Eigen/src/Geometry/EulerAngles.h
@@ -0,0 +1,96 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_EULERANGLES_H
+#define EIGEN_EULERANGLES_H
+
+/** \geometry_module \ingroup Geometry_Module
+  * \nonstableyet
+  *
+  * \returns the Euler-angles of the rotation matrix \c *this using the convention defined by the triplet (\a a0,\a a1,\a a2)
+  *
+  * Each of the three parameters \a a0,\a a1,\a a2 represents the respective rotation axis as an integer in {0,1,2}.
+  * For instance, in:
+  * \code Vector3f ea = mat.eulerAngles(2, 0, 2); \endcode
+  * "2" represents the z axis and "0" the x axis, etc. The returned angles are such that
+  * we have the following equality:
+  * \code
+  * mat == AngleAxisf(ea[0], Vector3f::UnitZ())
+  *      * AngleAxisf(ea[1], Vector3f::UnitX())
+  *      * AngleAxisf(ea[2], Vector3f::UnitZ()); \endcode
+  * This corresponds to the right-multiply conventions (with right hand side frames).
+  */
+template<typename Derived>
+inline Matrix<typename MatrixBase<Derived>::Scalar,3,1>
+MatrixBase<Derived>::eulerAngles(int a0, int a1, int a2) const
+{
+  /* Implemented from Graphics Gems IV */
+  EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived,3,3)
+
+  Matrix<Scalar,3,1> res;
+  typedef Matrix<typename Derived::Scalar,2,1> Vector2;
+  const Scalar epsilon = precision<Scalar>();
+
+  const int odd = ((a0+1)%3 == a1) ? 0 : 1;
+  const int i = a0;
+  const int j = (a0 + 1 + odd)%3;
+  const int k = (a0 + 2 - odd)%3;
+
+  if (a0==a2)
+  {
+    Scalar s = Vector2(coeff(j,i) , coeff(k,i)).norm();
+    res[1] = std::atan2(s, coeff(i,i));
+    if (s > epsilon)
+    {
+      res[0] = std::atan2(coeff(j,i), coeff(k,i));
+      res[2] = std::atan2(coeff(i,j),-coeff(i,k));
+    }
+    else
+    {
+      res[0] = Scalar(0);
+      res[2] = (coeff(i,i)>0?1:-1)*std::atan2(-coeff(k,j), coeff(j,j));
+    }
+  }
+  else
+  {
+    Scalar c = Vector2(coeff(i,i) , coeff(i,j)).norm();
+    res[1] = std::atan2(-coeff(i,k), c);
+    if (c > epsilon)
+    {
+      res[0] = std::atan2(coeff(j,k), coeff(k,k));
+      res[2] = std::atan2(coeff(i,j), coeff(i,i));
+    }
+    else
+    {
+      res[0] = Scalar(0);
+      res[2] = (coeff(i,k)>0?1:-1)*std::atan2(-coeff(k,j), coeff(j,j));
+    }
+  }
+  if (!odd)
+    res = -res;
+  return res;
+}
+
+
+#endif // EIGEN_EULERANGLES_H
--- a/Eigen/src/Geometry/Homogeneous.h
+++ b/Eigen/src/Geometry/Homogeneous.h
@@ -0,0 +1,260 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_HOMOGENEOUS_H
+#define EIGEN_HOMOGENEOUS_H
+
+/** \geometry_module \ingroup Geometry_Module
+  * \nonstableyet
+  * \class Homogeneous
+  *
+  * \brief Expression of one (or a set of) homogeneous vector(s)
+  *
+  * \param MatrixType the type of the object in which we are making homogeneous
+  *
+  * This class represents an expression of one (or a set of) homogeneous vector(s).
+  * It is the return type of MatrixBase::homogeneous() and most of the time
+  * this is the only way it is used.
+  *
+  * \sa MatrixBase::homogeneous()
+  */
+template<typename MatrixType,int Direction>
+struct ei_traits<Homogeneous<MatrixType,Direction> >
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
+  typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
+  enum {
+    RowsPlusOne = (MatrixType::RowsAtCompileTime != Dynamic) ?
+                  int(MatrixType::RowsAtCompileTime) + 1 : Dynamic,
+    ColsPlusOne = (MatrixType::ColsAtCompileTime != Dynamic) ?
+                  int(MatrixType::ColsAtCompileTime) + 1 : Dynamic,
+    RowsAtCompileTime = Direction==Vertical  ?  RowsPlusOne : MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = Direction==Horizontal ? ColsPlusOne : MatrixType::ColsAtCompileTime,
+    MaxRowsAtCompileTime = RowsAtCompileTime,
+    MaxColsAtCompileTime = ColsAtCompileTime,
+    Flags = _MatrixTypeNested::Flags & HereditaryBits,
+    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+  };
+};
+
+template<typename MatrixType,typename Lhs> struct ei_homogeneous_left_product_impl;
+template<typename MatrixType,typename Rhs> struct ei_homogeneous_right_product_impl;
+
+template<typename MatrixType,int Direction> class Homogeneous
+  : public MatrixBase<Homogeneous<MatrixType,Direction> >
+{
+  public:
+
+    EIGEN_GENERIC_PUBLIC_INTERFACE(Homogeneous)
+
+    inline Homogeneous(const MatrixType& matrix)
+      : m_matrix(matrix)
+    {}
+
+    inline int rows() const { return m_matrix.rows() + (Direction==Vertical   ? 1 : 0); }
+    inline int cols() const { return m_matrix.cols() + (Direction==Horizontal ? 1 : 0); }
+
+    inline Scalar coeff(int row, int col) const
+    {
+      if(  (Direction==Vertical   && row==m_matrix.rows())
+        || (Direction==Horizontal && col==m_matrix.cols()))
+        return 1;
+      return m_matrix.coeff(row, col);
+    }
+
+    template<typename Rhs>
+    inline const ei_homogeneous_right_product_impl<Homogeneous,Rhs>
+    operator* (const MatrixBase<Rhs>& rhs) const
+    {
+      ei_assert(Direction==Horizontal);
+      return ei_homogeneous_right_product_impl<Homogeneous,Rhs>(m_matrix,rhs.derived());
+    }
+
+    template<typename Lhs> friend
+    inline const ei_homogeneous_left_product_impl<Homogeneous,Lhs>
+    operator* (const MatrixBase<Lhs>& lhs, const Homogeneous& rhs)
+    {
+      ei_assert(Direction==Vertical);
+      return ei_homogeneous_left_product_impl<Homogeneous,Lhs>(lhs.derived(),rhs.m_matrix);
+    }
+
+    template<typename Scalar, int Dim, int Mode> friend
+    inline const ei_homogeneous_left_product_impl<Homogeneous,
+      typename Transform<Scalar,Dim,Mode>::AffinePart>
+    operator* (const Transform<Scalar,Dim,Mode>& tr, const Homogeneous& rhs)
+    {
+      ei_assert(Direction==Vertical);
+      return ei_homogeneous_left_product_impl<Homogeneous,typename Transform<Scalar,Dim,Mode>::AffinePart>
+        (tr.affine(),rhs.m_matrix);
+    }
+
+    template<typename Scalar, int Dim> friend
+    inline const ei_homogeneous_left_product_impl<Homogeneous,
+      typename Transform<Scalar,Dim,Projective>::MatrixType>
+    operator* (const Transform<Scalar,Dim,Projective>& tr, const Homogeneous& rhs)
+    {
+      ei_assert(Direction==Vertical);
+      return ei_homogeneous_left_product_impl<Homogeneous,typename Transform<Scalar,Dim,Projective>::MatrixType>
+        (tr.matrix(),rhs.m_matrix);
+    }
+
+  protected:
+    const typename MatrixType::Nested m_matrix;
+};
+
+/** \geometry_module
+  * \nonstableyet
+  * \return an expression of the equivalent homogeneous vector
+  *
+  * \vectoronly
+  *
+  * Example: \include MatrixBase_homogeneous.cpp
+  * Output: \verbinclude MatrixBase_homogeneous.out
+  *
+  * \sa class Homogeneous
+  */
+template<typename Derived>
+inline const typename MatrixBase<Derived>::HomogeneousReturnType
+MatrixBase<Derived>::homogeneous() const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  return derived();
+}
+
+/** \geometry_module
+  * \nonstableyet
+  * \returns a matrix expression of homogeneous column (or row) vectors
+  *
+  * Example: \include PartialRedux_homogeneous.cpp
+  * Output: \verbinclude PartialRedux_homogeneous.out
+  *
+  * \sa MatrixBase::homogeneous() */
+template<typename ExpressionType, int Direction>
+inline const Homogeneous<ExpressionType,Direction>
+PartialRedux<ExpressionType,Direction>::homogeneous() const
+{
+  return _expression();
+}
+
+/** \geometry_module
+  * \nonstableyet
+  * \returns an expression of the homogeneous normalized vector of \c *this
+  *
+  * Example: \include MatrixBase_hnormalized.cpp
+  * Output: \verbinclude MatrixBase_hnormalized.out
+  *
+  * \sa PartialRedux::hnormalized() */
+template<typename Derived>
+inline const typename MatrixBase<Derived>::HNormalizedReturnType
+MatrixBase<Derived>::hnormalized() const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  return StartMinusOne(derived(),0,0,
+    ColsAtCompileTime==1?size()-1:1,
+    ColsAtCompileTime==1?1:size()-1).nestByValue() / coeff(size()-1);
+}
+
+/** \geometry_module
+  * \nonstableyet
+  * \returns an expression of the homogeneous normalized vector of \c *this
+  *
+  * Example: \include DirectionWise_hnormalized.cpp
+  * Output: \verbinclude DirectionWise_hnormalized.out
+  *
+  * \sa MatrixBase::hnormalized() */
+template<typename ExpressionType, int Direction>
+inline const typename PartialRedux<ExpressionType,Direction>::HNormalizedReturnType
+PartialRedux<ExpressionType,Direction>::hnormalized() const
+{
+  return HNormalized_Block(_expression(),0,0,
+      Direction==Vertical   ? _expression().rows()-1 : _expression().rows(),
+      Direction==Horizontal ? _expression().cols()-1 : _expression().cols()).nestByValue()
+    .cwise()/
+      Replicate<NestByValue<HNormalized_Factors>,
+                Direction==Vertical   ? HNormalized_SizeMinusOne : 1,
+                Direction==Horizontal ? HNormalized_SizeMinusOne : 1>
+        (HNormalized_Factors(_expression(),
+          Direction==Vertical    ? _expression().rows()-1:0,
+          Direction==Horizontal  ? _expression().cols()-1:0,
+          Direction==Vertical    ? 1 : _expression().rows(),
+          Direction==Horizontal  ? 1 : _expression().cols()).nestByValue(),
+         Direction==Vertical   ? _expression().rows()-1 : 1,
+         Direction==Horizontal ? _expression().cols()-1 : 1).nestByValue();
+}
+
+template<typename MatrixType,typename Lhs>
+struct ei_homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs>
+  : public ReturnByValue<ei_homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs>,
+                         Matrix<typename ei_traits<MatrixType>::Scalar,
+                                Lhs::RowsAtCompileTime,MatrixType::ColsAtCompileTime> >
+{
+  typedef typename ei_cleantype<typename Lhs::Nested>::type LhsNested;
+  ei_homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs)
+    : m_lhs(lhs), m_rhs(rhs)
+  {}
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    // FIXME investigate how to allow lazy evaluation of this product when possible
+    dst = Block<LhsNested,
+              LhsNested::RowsAtCompileTime,
+              LhsNested::ColsAtCompileTime==Dynamic?Dynamic:LhsNested::ColsAtCompileTime-1>
+            (m_lhs,0,0,m_lhs.rows(),m_lhs.cols()-1) * m_rhs;
+    dst += m_lhs.col(m_lhs.cols()-1).rowwise()
+            .template replicate<MatrixType::ColsAtCompileTime>(m_rhs.cols());
+  }
+
+  const typename Lhs::Nested m_lhs;
+  const typename MatrixType::Nested m_rhs;
+};
+
+template<typename MatrixType,typename Rhs>
+struct ei_homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs>
+  : public ReturnByValue<ei_homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs>,
+                         Matrix<typename ei_traits<MatrixType>::Scalar,
+                                MatrixType::RowsAtCompileTime, Rhs::ColsAtCompileTime> >
+{
+  typedef typename ei_cleantype<typename Rhs::Nested>::type RhsNested;
+  ei_homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs)
+    : m_lhs(lhs), m_rhs(rhs)
+  {}
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    // FIXME investigate how to allow lazy evaluation of this product when possible
+    dst = m_lhs * Block<RhsNested,
+                        RhsNested::RowsAtCompileTime==Dynamic?Dynamic:RhsNested::RowsAtCompileTime-1,
+                        RhsNested::ColsAtCompileTime>
+            (m_rhs,0,0,m_rhs.rows()-1,m_rhs.cols());
+    dst += m_rhs.row(m_rhs.rows()-1).colwise()
+            .template replicate<MatrixType::RowsAtCompileTime>(m_lhs.rows());
+  }
+
+  const typename MatrixType::Nested m_lhs;
+  const typename Rhs::Nested m_rhs;
+
+};
+
+#endif // EIGEN_HOMOGENEOUS_H
--- a/Eigen/src/Geometry/Hyperplane.h
+++ b/Eigen/src/Geometry/Hyperplane.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -26,7 +26,7 @@
 #ifndef EIGEN_HYPERPLANE_H
 #define EIGEN_HYPERPLANE_H

-/** \geometry_module \ingroup GeometryModule
+/** \geometry_module \ingroup Geometry_Module
  *
  * \class Hyperplane
  *
@@ -45,177 +45,224 @@
  */
 template <typename _Scalar, int _AmbientDim>
 class Hyperplane
-  #ifdef EIGEN_VECTORIZE
-  : public ei_with_aligned_operator_new<_Scalar,_AmbientDim==Dynamic ? Dynamic : _AmbientDim+1>
-  #endif
 {
-  public:
+public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==Dynamic ? Dynamic : _AmbientDim+1)
+  enum { AmbientDimAtCompileTime = _AmbientDim };
+  typedef _Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef Matrix<Scalar,AmbientDimAtCompileTime,1> VectorType;
+  typedef Matrix<Scalar,AmbientDimAtCompileTime==Dynamic
+                        ? Dynamic
+                        : AmbientDimAtCompileTime+1,1> Coefficients;
+  typedef Block<Coefficients,AmbientDimAtCompileTime,1> NormalReturnType;

-    enum { AmbientDimAtCompileTime = _AmbientDim };
-    typedef _Scalar Scalar;
-    typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef Matrix<Scalar,AmbientDimAtCompileTime,1> VectorType;
-    typedef Matrix<Scalar,AmbientDimAtCompileTime==Dynamic
-                          ? Dynamic
-                          : AmbientDimAtCompileTime+1,1> Coefficients;
-    typedef Block<Coefficients,AmbientDimAtCompileTime,1> NormalReturnType;
+  /** Default constructor without initialization */
+  inline explicit Hyperplane() {}

-    /** Default constructor without initialization */
-    inline explicit Hyperplane(int _dim = AmbientDimAtCompileTime) : m_coeffs(_dim+1) {}
-    
-    /** Construct a plane from its normal \a n and a point \a e onto the plane.
-      * \warning the vector normal is assumed to be normalized.
-      */
-    inline Hyperplane(const VectorType& n, const VectorType e)
-      : m_coeffs(n.size()+1)
-    {
-      normal() = n;
-      offset() = -e.dot(n);
+  /** Constructs a dynamic-size hyperplane with \a _dim the dimension
+    * of the ambient space */
+  inline explicit Hyperplane(int _dim) : m_coeffs(_dim+1) {}
+
+  /** Construct a plane from its normal \a n and a point \a e onto the plane.
+    * \warning the vector normal is assumed to be normalized.
+    */
+  inline Hyperplane(const VectorType& n, const VectorType& e)
+    : m_coeffs(n.size()+1)
+  {
+    normal() = n;
+    offset() = -e.dot(n);
+  }
+
+  /** Constructs a plane from its normal \a n and distance to the origin \a d
+    * such that the algebraic equation of the plane is \f$ n \cdot x + d = 0 \f$.
+    * \warning the vector normal is assumed to be normalized.
+    */
+  inline Hyperplane(const VectorType& n, Scalar d)
+    : m_coeffs(n.size()+1)
+  {
+    normal() = n;
+    offset() = d;
+  }
+
+  /** Constructs a hyperplane passing through the two points. If the dimension of the ambient space
+    * is greater than 2, then there isn't uniqueness, so an arbitrary choice is made.
+    */
+  static inline Hyperplane Through(const VectorType& p0, const VectorType& p1)
+  {
+    Hyperplane result(p0.size());
+    result.normal() = (p1 - p0).unitOrthogonal();
+    result.offset() = -result.normal().dot(p0);
+    return result;
+  }
+
+  /** Constructs a hyperplane passing through the three points. The dimension of the ambient space
+    * is required to be exactly 3.
+    */
+  static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2)
+  {
+    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 3)
+    Hyperplane result(p0.size());
+    result.normal() = (p2 - p0).cross(p1 - p0).normalized();
+    result.offset() = -result.normal().dot(p0);
+    return result;
+  }
+
+  /** Constructs a hyperplane passing through the parametrized line \a parametrized.
+    * If the dimension of the ambient space is greater than 2, then there isn't uniqueness,
+    * so an arbitrary choice is made.
+    */
+  // FIXME to be consitent with the rest this could be implemented as a static Through function ??
+  explicit Hyperplane(const ParametrizedLine<Scalar, AmbientDimAtCompileTime>& parametrized)
+  {
+    normal() = parametrized.direction().unitOrthogonal();
+    offset() = -normal().dot(parametrized.origin());
+  }
+
+  ~Hyperplane() {}
+
+  /** \returns the dimension in which the plane holds */
+  inline int dim() const { return AmbientDimAtCompileTime==Dynamic ? m_coeffs.size()-1 : AmbientDimAtCompileTime; }
+
+  /** normalizes \c *this */
+  void normalize(void)
+  {
+    m_coeffs /= normal().norm();
+  }
+
+  /** \returns the signed distance between the plane \c *this and a point \a p.
+    * \sa absDistance()
+    */
+  inline Scalar signedDistance(const VectorType& p) const { return p.dot(normal()) + offset(); }
+
+  /** \returns the absolute distance between the plane \c *this and a point \a p.
+    * \sa signedDistance()
+    */
+  inline Scalar absDistance(const VectorType& p) const { return ei_abs(signedDistance(p)); }
+
+  /** \returns the projection of a point \a p onto the plane \c *this.
+    */
+  inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); }
+
+  /** \returns a constant reference to the unit normal vector of the plane, which corresponds
+    * to the linear part of the implicit equation.
+    */
+  inline const NormalReturnType normal() const { return NormalReturnType(m_coeffs,0,0,dim(),1); }
+
+  /** \returns a non-constant reference to the unit normal vector of the plane, which corresponds
+    * to the linear part of the implicit equation.
+    */
+  inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); }
+
+  /** \returns the distance to the origin, which is also the "constant term" of the implicit equation
+    * \warning the vector normal is assumed to be normalized.
+    */
+  inline const Scalar& offset() const { return m_coeffs.coeff(dim()); }
+
+  /** \returns a non-constant reference to the distance to the origin, which is also the constant part
+    * of the implicit equation */
+  inline Scalar& offset() { return m_coeffs(dim()); }
+
+  /** \returns a constant reference to the coefficients c_i of the plane equation:
+    * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$
+    */
+  inline const Coefficients& coeffs() const { return m_coeffs; }
+
+  /** \returns a non-constant reference to the coefficients c_i of the plane equation:
+    * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$
+    */
+  inline Coefficients& coeffs() { return m_coeffs; }
+
+  /** \returns the intersection of *this with \a other.
+    *
+    * \warning The ambient space must be a plane, i.e. have dimension 2, so that \c *this and \a other are lines.
+    *
+    * \note If \a other is approximately parallel to *this, this method will return any point on *this.
+    */
+  VectorType intersection(const Hyperplane& other)
+  {
+    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
+    Scalar det = coeffs().coeff(0) * other.coeffs().coeff(1) - coeffs().coeff(1) * other.coeffs().coeff(0);
+    // since the line equations ax+by=c are normalized with a^2+b^2=1, the following tests
+    // whether the two lines are approximately parallel.
+    if(ei_isMuchSmallerThan(det, Scalar(1)))
+    {   // special case where the two lines are approximately parallel. Pick any point on the first line.
+        if(ei_abs(coeffs().coeff(1))>ei_abs(coeffs().coeff(0)))
+            return VectorType(coeffs().coeff(1), -coeffs().coeff(2)/coeffs().coeff(1)-coeffs().coeff(0));
+        else
+            return VectorType(-coeffs().coeff(2)/coeffs().coeff(0)-coeffs().coeff(1), coeffs().coeff(0));
    }
-    
-    /** Constructs a plane from its normal \a n and distance to the origin \a d.
-      * \warning the vector normal is assumed to be normalized.
-      */
-    inline Hyperplane(const VectorType& n, Scalar d)
-      : m_coeffs(n.size()+1)
-    {
-      normal() = n;
-      offset() = d;
+    else
+    {   // general case
+        Scalar invdet = Scalar(1) / det;
+        return VectorType(invdet*(coeffs().coeff(1)*other.coeffs().coeff(2)-other.coeffs().coeff(1)*coeffs().coeff(2)),
+                          invdet*(other.coeffs().coeff(0)*coeffs().coeff(2)-coeffs().coeff(0)*other.coeffs().coeff(2)));
    }
+  }

-    /** Constructs a hyperplane passing through the two points. If the dimension of the ambient space
-      * is greater than 2, then there isn't uniqueness, so an arbitrary choice is made.
-      */
-    static inline Hyperplane Through(const VectorType& p0, const VectorType& p1)
+  /** Applies the transformation matrix \a mat to \c *this and returns a reference to \c *this.
+    *
+    * \param mat the Dim x Dim transformation matrix
+    * \param traits specifies whether the matrix \a mat represents an Isometry
+    *               or a more generic Affine transformation. The default is Affine.
+    */
+  template<typename XprType>
+  inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)
+  {
+    if (traits==Affine)
+      normal() = mat.inverse().transpose() * normal();
+    else if (traits==Isometry)
+      normal() = mat * normal();
+    else
    {
-      Hyperplane result(p0.size());
-      result.normal() = (p1 - p0).unitOrthogonal();
-      result.offset() = -result.normal().dot(p0);
-      return result;
+      ei_assert("invalid traits value in Hyperplane::transform()");
    }
+    return *this;
+  }

-    /** Constructs a hyperplane passing through the three points. The dimension of the ambient space
-      * is required to be exactly 3.
-      */
-    static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2)
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 3);
-      Hyperplane result(p0.size());
-      result.normal() = (p2 - p0).cross(p1 - p0).normalized();
-      result.offset() = -result.normal().dot(p0);
-      return result;
-    }
+  /** Applies the transformation \a t to \c *this and returns a reference to \c *this.
+    *
+    * \param t the transformation of dimension Dim
+    * \param traits specifies whether the transformation \a t represents an Isometry
+    *               or a more generic Affine transformation. The default is Affine.
+    *               Other kind of transformations are not supported.
+    */
+  inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime>& t,
+                                TransformTraits traits = Affine)
+  {
+    transform(t.linear(), traits);
+    offset() -= t.translation().dot(normal());
+    return *this;
+  }

-    Hyperplane(const ParametrizedLine<Scalar, AmbientDimAtCompileTime>& parametrized)
-    {
-      normal() = parametrized.direction().unitOrthogonal();
-      offset() = -normal().dot(parametrized.origin());
-    }
-    
-    ~Hyperplane() {}
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  inline typename ei_cast_return_type<Hyperplane,
+           Hyperplane<NewScalarType,AmbientDimAtCompileTime> >::type cast() const
+  {
+    return typename ei_cast_return_type<Hyperplane,
+                    Hyperplane<NewScalarType,AmbientDimAtCompileTime> >::type(*this);
+  }

-    /** \returns the dimension in which the plane holds */
-    inline int dim() const { return AmbientDimAtCompileTime==Dynamic ? m_coeffs.size()-1 : AmbientDimAtCompileTime; }
-    
-    /** normalizes \c *this */
-    void normalize(void)
-    {
-      m_coeffs /= normal().norm();
-    }
-    
-    /** \returns the signed distance between the plane \c *this and a point \a p.
-      */
-    inline Scalar signedDistance(const VectorType& p) const { return p.dot(normal()) + offset(); }
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType>
+  inline explicit Hyperplane(const Hyperplane<OtherScalarType,AmbientDimAtCompileTime>& other)
+  { m_coeffs = other.coeffs().template cast<Scalar>(); }

-    /** \returns the absolute distance between the plane \c *this and a point \a p.
-      */
-    inline Scalar absDistance(const VectorType& p) const { return ei_abs(signedDistance(p)); }
-    
-    /** \returns the projection of a point \a p onto the plane \c *this.
-      */
-    inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); }
-
-    /** \returns a constant reference to the unit normal vector of the plane, which corresponds
-      * to the linear part of the implicit equation.
-      */
-    inline const NormalReturnType normal() const { return NormalReturnType(m_coeffs,0,0,dim(),1); }
-
-    /** \returns a non-constant reference to the unit normal vector of the plane, which corresponds
-      * to the linear part of the implicit equation.
-      */
-    inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); }
-
-    /** \returns the distance to the origin, which is also the "constant term" of the implicit equation
-      * \warning the vector normal is assumed to be normalized.
-      */
-    inline const Scalar& offset() const { return m_coeffs.coeff(dim()); }
-
-    /** \returns a non-constant reference to the distance to the origin, which is also the constant part
-      * of the implicit equation */
-    inline Scalar& offset() { return m_coeffs(dim()); }
-    
-    /** \returns a constant reference to the coefficients c_i of the plane equation:
-      * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$
-      */
-    inline const Coefficients& coeffs() const { return m_coeffs; }
-
-    /** \returns a non-constant reference to the coefficients c_i of the plane equation:
-      * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$
-      */
-    inline Coefficients& coeffs() { return m_coeffs; }
-
-    /** \returns the intersection of *this with \a other.
-      *
-      * \warning The ambient space must be a plane, i.e. have dimension 2, so that *this and \a other are lines.
-      *
-      * \note If \a other is approximately parallel to *this, this method will return any point on *this.
-      */
-    VectorType intersection(const Hyperplane& other)
-    {
-      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2);
-      Scalar det = coeffs().coeff(0) * other.coeffs().coeff(1) - coeffs().coeff(1) * other.coeffs().coeff(0);
-      // since the line equations ax+by=c are normalized with a^2+b^2=1, the following tests
-      // whether the two lines are approximately parallel.
-      if(ei_isMuchSmallerThan(det, Scalar(1)))
-      {   // special case where the two lines are approximately parallel. Pick any point on the first line.
-          if(ei_abs(coeffs().coeff(1))>ei_abs(coeffs().coeff(0)))
-              return VectorType(coeffs().coeff(1), -coeffs().coeff(2)/coeffs().coeff(1)-coeffs().coeff(0));
-          else
-              return VectorType(-coeffs().coeff(2)/coeffs().coeff(0)-coeffs().coeff(1), coeffs().coeff(0));
-      }
-      else
-      {   // general case
-          Scalar invdet = Scalar(1) / det;
-          return VectorType(invdet*(coeffs().coeff(1)*other.coeffs().coeff(2)-other.coeffs().coeff(1)*coeffs().coeff(2)),
-                            invdet*(other.coeffs().coeff(0)*coeffs().coeff(2)-coeffs().coeff(0)*other.coeffs().coeff(2)));
-      }
-    }
-    
-    template<typename XprType>
-    inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)
-    {
-      if (traits==Affine)
-        normal() = mat.inverse().transpose() * normal();
-      else if (traits==Isometry)
-        normal() = mat * normal();
-      else
-      {
-        ei_assert("invalid traits value in Hyperplane::transform()");
-      }
-      return *this;
-    }
-
-    inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime>& t,
-                                 TransformTraits traits = Affine)
-    {
-      transform(t.linear(), traits);
-      offset() -= t.translation().dot(normal());
-      return *this;
-    }
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  bool isApprox(const Hyperplane& other, typename NumTraits<Scalar>::Real prec = precision<Scalar>()) const
+  { return m_coeffs.isApprox(other.m_coeffs, prec); }

 protected:

-    Coefficients m_coeffs;
+  Coefficients m_coeffs;
 };

 #endif // EIGEN_HYPERPLANE_H
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -1,8 +1,8 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -27,34 +27,141 @@
 #define EIGEN_ORTHOMETHODS_H

 /** \geometry_module
-  * \returns the cross product of \c *this and \a other */
+  *
+  * \returns the cross product of \c *this and \a other
+  *
+  * Here is a very good explanation of cross-product: http://xkcd.com/199/
+  * \sa MatrixBase::cross3()
+  */
 template<typename Derived>
 template<typename OtherDerived>
-inline typename MatrixBase<Derived>::EvalType
+inline typename MatrixBase<Derived>::PlainMatrixType
 MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
 {
-  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,3);
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,3)
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)

  // Note that there is no need for an expression here since the compiler
  // optimize such a small temporary very well (even within a complex expression)
  const typename ei_nested<Derived,2>::type lhs(derived());
  const typename ei_nested<OtherDerived,2>::type rhs(other.derived());
-  return typename ei_eval<Derived>::type(
+  return typename ei_plain_matrix_type<Derived>::type(
    lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1),
    lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2),
    lhs.coeff(0) * rhs.coeff(1) - lhs.coeff(1) * rhs.coeff(0)
  );
 }

+template< int Arch,typename VectorLhs,typename VectorRhs,
+          typename Scalar = typename VectorLhs::Scalar,
+          int Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
+struct ei_cross3_impl {
+  inline static typename ei_plain_matrix_type<VectorLhs>::type
+  run(const VectorLhs& lhs, const VectorRhs& rhs)
+  {
+    return typename ei_plain_matrix_type<VectorLhs>::type(
+      lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1),
+      lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2),
+      lhs.coeff(0) * rhs.coeff(1) - lhs.coeff(1) * rhs.coeff(0),
+      0
+    );
+  }
+};
+
+/** \geometry_module
+  *
+  * \returns the cross product of \c *this and \a other using only the x, y, and z coefficients
+  *
+  * The size of \c *this and \a other must be four. This function is especially useful
+  * when using 4D vectors instead of 3D ones to get advantage of SSE/AltiVec vectorization.
+  *
+  * \sa MatrixBase::cross()
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline typename MatrixBase<Derived>::PlainMatrixType
+MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,4)
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,4)
+
+  typedef typename ei_nested<Derived,2>::type DerivedNested;
+  typedef typename ei_nested<OtherDerived,2>::type OtherDerivedNested;
+  const DerivedNested lhs(derived());
+  const OtherDerivedNested rhs(other.derived());
+
+  return ei_cross3_impl<EiArch,typename ei_cleantype<DerivedNested>::type,
+                               typename ei_cleantype<OtherDerivedNested>::type>::run(lhs,rhs);
+}
+
+/** \returns a matrix expression of the cross product of each column or row
+  * of the referenced expression with the \a other vector.
+  *
+  * The referenced matrix must have one dimension equal to 3.
+  * The result matrix has the same dimensions than the referenced one.
+  *
+  * \geometry_module
+  *
+  * \sa MatrixBase::cross() */
+template<typename ExpressionType, int Direction>
+template<typename OtherDerived>
+const typename PartialRedux<ExpressionType,Direction>::CrossReturnType
+PartialRedux<ExpressionType,Direction>::cross(const MatrixBase<OtherDerived>& other) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)
+  EIGEN_STATIC_ASSERT((ei_is_same_type<Scalar, typename OtherDerived::Scalar>::ret),
+    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
+  CrossReturnType res(_expression().rows(),_expression().cols());
+  if(Direction==Vertical)
+  {
+    ei_assert(CrossReturnType::RowsAtCompileTime==3 && "the matrix must have exactly 3 rows");
+    res.row(0) = _expression().row(1) * other.coeff(2) - _expression().row(2) * other.coeff(1);
+    res.row(1) = _expression().row(2) * other.coeff(0) - _expression().row(0) * other.coeff(2);
+    res.row(2) = _expression().row(0) * other.coeff(1) - _expression().row(1) * other.coeff(0);
+  }
+  else
+  {
+    ei_assert(CrossReturnType::ColsAtCompileTime==3 && "the matrix must have exactly 3 columns");
+    res.col(0) = _expression().col(1) * other.coeff(2) - _expression().col(2) * other.coeff(1);
+    res.col(1) = _expression().col(2) * other.coeff(0) - _expression().col(0) * other.coeff(2);
+    res.col(2) = _expression().col(0) * other.coeff(1) - _expression().col(1) * other.coeff(0);
+  }
+  return res;
+}
+
 template<typename Derived, int Size = Derived::SizeAtCompileTime>
 struct ei_unitOrthogonal_selector
 {
-  typedef typename ei_eval<Derived>::type VectorType;
+  typedef typename ei_plain_matrix_type<Derived>::type VectorType;
+  typedef typename ei_traits<Derived>::Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef Matrix<Scalar,2,1> Vector2;
+  inline static VectorType run(const Derived& src)
+  {
+    VectorType perp = VectorType::Zero(src.size());
+    int maxi = 0;
+    int sndi = 0;
+    src.cwise().abs().maxCoeff(&maxi);
+    if (maxi==0)
+      sndi = 1;
+    RealScalar invnm = RealScalar(1)/(Vector2() << src.coeff(sndi),src.coeff(maxi)).finished().norm();
+    perp.coeffRef(maxi) = -ei_conj(src.coeff(sndi)) * invnm;
+    perp.coeffRef(sndi) =  ei_conj(src.coeff(maxi)) * invnm;
+
+    return perp;
+   }
+};
+
+template<typename Derived>
+struct ei_unitOrthogonal_selector<Derived,3>
+{
+  typedef typename ei_plain_matrix_type<Derived>::type VectorType;
  typedef typename ei_traits<Derived>::Scalar Scalar;
  typedef typename NumTraits<Scalar>::Real RealScalar;
  inline static VectorType run(const Derived& src)
  {
-    VectorType perp(src.size());
+    VectorType perp;
    /* Let us compute the crossed product of *this with a vector
     * that is not too close to being colinear to *this.
     */
@@ -81,9 +188,6 @@ struct ei_unitOrthogonal_selector
      perp.coeffRef(1) = -ei_conj(src.z())*invnm;
      perp.coeffRef(2) = ei_conj(src.y())*invnm;
    }
-    if( (Derived::SizeAtCompileTime!=Dynamic && Derived::SizeAtCompileTime>3)
-     || (Derived::SizeAtCompileTime==Dynamic && src.size()>3) )
-      perp.end(src.size()-3).setZero();

    return perp;
   }
@@ -92,7 +196,7 @@ struct ei_unitOrthogonal_selector
 template<typename Derived>
 struct ei_unitOrthogonal_selector<Derived,2>
 {
-  typedef typename ei_eval<Derived>::type VectorType;
+  typedef typename ei_plain_matrix_type<Derived>::type VectorType;
  inline static VectorType run(const Derived& src)
  { return VectorType(-ei_conj(src.y()), ei_conj(src.x())).normalized(); }
 };
@@ -105,10 +209,10 @@ struct ei_unitOrthogonal_selector<Derived,2>
  * \sa cross()
  */
 template<typename Derived>
-typename MatrixBase<Derived>::EvalType
+typename MatrixBase<Derived>::PlainMatrixType
 MatrixBase<Derived>::unitOrthogonal() const
 {
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return ei_unitOrthogonal_selector<Derived>::run(derived());
 }

--- a/Eigen/src/Geometry/ParametrizedLine.h
+++ b/Eigen/src/Geometry/ParametrizedLine.h
@@ -2,7 +2,7 @@
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2008 Benoit Jacob <jacob@math.jussieu.fr>
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -26,87 +26,124 @@
 #ifndef EIGEN_PARAMETRIZEDLINE_H
 #define EIGEN_PARAMETRIZEDLINE_H

-/** \geometry_module \ingroup GeometryModule
+/** \geometry_module \ingroup Geometry_Module
  *
  * \class ParametrizedLine
  *
  * \brief A parametrized line
  *
+  * A parametrized line is defined by an origin point \f$ \mathbf{o} \f$ and a unit
+  * direction vector \f$ \mathbf{d} \f$ such that the line corresponds to
+  * the set \f$ l(t) = \mathbf{o} + t \mathbf{d} \f$, \f$ l \in \mathbf{R} \f$.
+  *
  * \param _Scalar the scalar type, i.e., the type of the coefficients
  * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
-  *             Notice that the dimension of the hyperplane is _AmbientDim-1.
  */
 template <typename _Scalar, int _AmbientDim>
 class ParametrizedLine
-  #ifdef EIGEN_VECTORIZE
-  : public ei_with_aligned_operator_new<_Scalar,_AmbientDim>
-  #endif
 {
-  public:
+public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
+  enum { AmbientDimAtCompileTime = _AmbientDim };
+  typedef _Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef Matrix<Scalar,AmbientDimAtCompileTime,1> VectorType;

-    enum { AmbientDimAtCompileTime = _AmbientDim };
-    typedef _Scalar Scalar;
-    typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef Matrix<Scalar,AmbientDimAtCompileTime,1> VectorType;
+  /** Default constructor without initialization */
+  inline explicit ParametrizedLine() {}

-    /** Default constructor without initialization */
-    inline explicit ParametrizedLine(int _dim = AmbientDimAtCompileTime)
-      : m_origin(_dim), m_direction(_dim)
-    {}
-    
-    ParametrizedLine(const VectorType& origin, const VectorType& direction)
-      : m_origin(origin), m_direction(direction) {}
-    explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim>& hyperplane);
+  /** Constructs a dynamic-size line with \a _dim the dimension
+    * of the ambient space */
+  inline explicit ParametrizedLine(int _dim) : m_origin(_dim), m_direction(_dim) {}

-    ~ParametrizedLine() {}
+  /** Initializes a parametrized line of direction \a direction and origin \a origin.
+    * \warning the vector direction is assumed to be normalized.
+    */
+  ParametrizedLine(const VectorType& origin, const VectorType& direction)
+    : m_origin(origin), m_direction(direction) {}

-    /** \returns the dimension in which the line holds */
-    inline int dim() const { return m_direction.size(); }
+  explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim>& hyperplane);

-    const VectorType& origin() const { return m_origin; }
-    VectorType& origin() { return m_origin; }
+  /** Constructs a parametrized line going from \a p0 to \a p1. */
+  static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1)
+  { return ParametrizedLine(p0, (p1-p0).normalized()); }

-    const VectorType& direction() const { return m_direction; }
-    VectorType& direction() { return m_direction; }
+  ~ParametrizedLine() {}

-    /** \returns the squared distance of a point \a p to its projection onto the line \c *this.
-      * \sa distance()
-      */
-    RealScalar squaredDistance(const VectorType& p) const
-    {
-      VectorType diff = p-origin();
-      return (diff - diff.dot(direction())* direction()).norm2();
-    }
-    /** \returns the distance of a point \a p to its projection onto the line \c *this.
-      * \sa squaredDistance()
-      */
-    RealScalar distance(const VectorType& p) const { return ei_sqrt(squaredDistance(p)); }
+  /** \returns the dimension in which the line holds */
+  inline int dim() const { return m_direction.size(); }

-    /** \returns the projection of a point \a p onto the line \c *this.
-      */
-    VectorType projection(const VectorType& p) const
-    { return origin() + (p-origin()).dot(direction()) * direction(); }
+  const VectorType& origin() const { return m_origin; }
+  VectorType& origin() { return m_origin; }

-    Scalar intersection(const Hyperplane<_Scalar, _AmbientDim>& hyperplane);
+  const VectorType& direction() const { return m_direction; }
+  VectorType& direction() { return m_direction; }

-  protected:
+  /** \returns the squared distance of a point \a p to its projection onto the line \c *this.
+    * \sa distance()
+    */
+  RealScalar squaredDistance(const VectorType& p) const
+  {
+    VectorType diff = p-origin();
+    return (diff - diff.dot(direction())* direction()).squaredNorm();
+  }
+  /** \returns the distance of a point \a p to its projection onto the line \c *this.
+    * \sa squaredDistance()
+    */
+  RealScalar distance(const VectorType& p) const { return ei_sqrt(squaredDistance(p)); }

-    VectorType m_origin, m_direction;
+  /** \returns the projection of a point \a p onto the line \c *this. */
+  VectorType projection(const VectorType& p) const
+  { return origin() + (p-origin()).dot(direction()) * direction(); }
+
+  Scalar intersection(const Hyperplane<_Scalar, _AmbientDim>& hyperplane);
+
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  inline typename ei_cast_return_type<ParametrizedLine,
+           ParametrizedLine<NewScalarType,AmbientDimAtCompileTime> >::type cast() const
+  {
+    return typename ei_cast_return_type<ParametrizedLine,
+                    ParametrizedLine<NewScalarType,AmbientDimAtCompileTime> >::type(*this);
+  }
+
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType>
+  inline explicit ParametrizedLine(const ParametrizedLine<OtherScalarType,AmbientDimAtCompileTime>& other)
+  {
+    m_origin = other.origin().template cast<Scalar>();
+    m_direction = other.direction().template cast<Scalar>();
+  }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  bool isApprox(const ParametrizedLine& other, typename NumTraits<Scalar>::Real prec = precision<Scalar>()) const
+  { return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); }
+
+protected:
+
+  VectorType m_origin, m_direction;
 };

-/** Construct a parametrized line from a 2D hyperplane
+/** Constructs a parametrized line from a 2D hyperplane
  *
  * \warning the ambient space must have dimension 2 such that the hyperplane actually describes a line
  */
 template <typename _Scalar, int _AmbientDim>
 inline ParametrizedLine<_Scalar, _AmbientDim>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim>& hyperplane)
 {
-  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2);
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
  direction() = hyperplane.normal().unitOrthogonal();
  origin() = -hyperplane.normal()*hyperplane.offset();
 }

-/** \returns the parameter value of the intersection between *this and the given hyperplane
+/** \returns the parameter value of the intersection between \c *this and the given hyperplane
  */
 template <typename _Scalar, int _AmbientDim>
 inline _Scalar ParametrizedLine<_Scalar, _AmbientDim>::intersection(const Hyperplane<_Scalar, _AmbientDim>& hyperplane)
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -30,7 +30,7 @@ template<typename Other,
         int OtherCols=Other::ColsAtCompileTime>
 struct ei_quaternion_assign_impl;

-/** \geometry_module \ingroup GeometryModule
+/** \geometry_module \ingroup Geometry_Module
  *
  * \class Quaternion
  *
@@ -59,21 +59,21 @@ template<typename _Scalar> struct ei_traits<Quaternion<_Scalar> >

 template<typename _Scalar>
 class Quaternion : public RotationBase<Quaternion<_Scalar>,3>
-  #ifdef EIGEN_VECTORIZE
-  , public ei_with_aligned_operator_new<_Scalar,4>
-  #endif
 {
  typedef RotationBase<Quaternion<_Scalar>,3> Base;
-  typedef Matrix<_Scalar, 4, 1> Coefficients;
-  Coefficients m_coeffs;
+
+

 public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,4)

  using Base::operator*;

  /** the scalar type of the coefficients */
  typedef _Scalar Scalar;

+  /** the type of the Coefficients 4-vector */
+  typedef Matrix<Scalar, 4, 1> Coefficients;
  /** the type of a 3D vector */
  typedef Matrix<Scalar,3,1> Vector3;
  /** the equivalent rotation matrix type */
@@ -111,13 +111,12 @@ public:
  /** \returns a vector expression of the coefficients (x,y,z,w) */
  inline Coefficients& coeffs() { return m_coeffs; }

-  /** Default constructor and initializing an identity quaternion. */
-  inline Quaternion()
-  { m_coeffs << 0, 0, 0, 1; }
+  /** Default constructor leaving the quaternion uninitialized. */
+  inline Quaternion() {}

  /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from
    * its four coefficients \a w, \a x, \a y and \a z.
-    * 
+    *
    * \warning Note the order of the arguments: the real \a w coefficient first,
    * while internally the coefficients are stored in the following order:
    * [\c x, \c y, \c z, \c w]
@@ -151,24 +150,24 @@ public:

  /** \sa Quaternion::Identity(), MatrixBase::setIdentity()
    */
-  inline Quaternion& setIdentity() { m_coeffs << 1, 0, 0, 0; return *this; }
+  inline Quaternion& setIdentity() { m_coeffs << 0, 0, 0, 1; return *this; }

  /** \returns the squared norm of the quaternion's coefficients
-    * \sa Quaternion::norm(), MatrixBase::norm2()
+    * \sa Quaternion::norm(), MatrixBase::squaredNorm()
    */
-  inline Scalar norm2() const { return m_coeffs.norm2(); }
+  inline Scalar squaredNorm() const { return m_coeffs.squaredNorm(); }

  /** \returns the norm of the quaternion's coefficients
-    * \sa Quaternion::norm2(), MatrixBase::norm()
+    * \sa Quaternion::squaredNorm(), MatrixBase::norm()
    */
  inline Scalar norm() const { return m_coeffs.norm(); }
-  
-  /** Normalizes the quaternion \c *this 
+
+  /** Normalizes the quaternion \c *this
    * \sa normalized(), MatrixBase::normalize() */
-  inline void normalize() { m_coeffs.normalize(); } 
+  inline void normalize() { m_coeffs.normalize(); }
  /** \returns a normalized version of \c *this
    * \sa normalize(), MatrixBase::normalized() */
-  inline Quaternion normalized() const { Quaternion(m_coeffs.normalized()); } 
+  inline Quaternion normalized() const { return Quaternion(m_coeffs.normalized()); }

  /** \returns the dot product of \c *this and \a other
    * Geometrically speaking, the dot product of two unit quaternions
@@ -192,29 +191,58 @@ public:

  Quaternion slerp(Scalar t, const Quaternion& other) const;

-  template<typename Derived>
-  Vector3 operator* (const MatrixBase<Derived>& vec) const;
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  inline typename ei_cast_return_type<Quaternion,Quaternion<NewScalarType> >::type cast() const
+  { return typename ei_cast_return_type<Quaternion,Quaternion<NewScalarType> >::type(*this); }

+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType>
+  inline explicit Quaternion(const Quaternion<OtherScalarType>& other)
+  { m_coeffs = other.coeffs().template cast<Scalar>(); }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  bool isApprox(const Quaternion& other, typename NumTraits<Scalar>::Real prec = precision<Scalar>()) const
+  { return m_coeffs.isApprox(other.m_coeffs, prec); }
+
+  Vector3 _transformVector(Vector3 v) const;
+
+protected:
+  Coefficients m_coeffs;
 };

-/** \ingroup GeometryModule
+/** \ingroup Geometry_Module
  * single precision quaternion type */
 typedef Quaternion<float> Quaternionf;
-/** \ingroup GeometryModule
+/** \ingroup Geometry_Module
  * double precision quaternion type */
 typedef Quaternion<double> Quaterniond;

+// Generic Quaternion * Quaternion product
+template<int Arch,typename Scalar> inline Quaternion<Scalar>
+ei_quaternion_product(const Quaternion<Scalar>& a, const Quaternion<Scalar>& b)
+{
+  return Quaternion<Scalar>
+  (
+    a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(),
+    a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(),
+    a.w() * b.y() + a.y() * b.w() + a.z() * b.x() - a.x() * b.z(),
+    a.w() * b.z() + a.z() * b.w() + a.x() * b.y() - a.y() * b.x()
+  );
+}
+
 /** \returns the concatenation of two rotations as a quaternion-quaternion product */
 template <typename Scalar>
 inline Quaternion<Scalar> Quaternion<Scalar>::operator* (const Quaternion& other) const
 {
-  return Quaternion
-  (
-    this->w() * other.w() - this->x() * other.x() - this->y() * other.y() - this->z() * other.z(),
-    this->w() * other.x() + this->x() * other.w() + this->y() * other.z() - this->z() * other.y(),
-    this->w() * other.y() + this->y() * other.w() + this->z() * other.x() - this->x() * other.z(),
-    this->w() * other.z() + this->z() * other.w() + this->x() * other.y() - this->y() * other.x()
-  );
+  return ei_quaternion_product<EiArch>(*this,other);
 }

 /** \sa operator*(Quaternion) */
@@ -232,17 +260,15 @@ inline Quaternion<Scalar>& Quaternion<Scalar>::operator*= (const Quaternion& oth
  *   - Via a Matrix3: 24 + 15n
  */
 template <typename Scalar>
-template<typename Derived>
 inline typename Quaternion<Scalar>::Vector3
-Quaternion<Scalar>::operator* (const MatrixBase<Derived>& v) const
+Quaternion<Scalar>::_transformVector(Vector3 v) const
 {
    // Note that this algorithm comes from the optimization by hand
    // of the conversion to a Matrix followed by a Matrix/Vector product.
    // It appears to be much faster than the common algorithm found
    // in the litterature (30 versus 39 flops). It also requires two
    // Vector3 as temporaries.
-    Vector3 uv;
-    uv = 2 * this->vec().cross(v);
+    Vector3 uv = Scalar(2) * this->vec().cross(v);
    return v + this->w() * uv + this->vec().cross(uv);
 }

@@ -258,7 +284,7 @@ inline Quaternion<Scalar>& Quaternion<Scalar>::operator=(const Quaternion& other
 template<typename Scalar>
 inline Quaternion<Scalar>& Quaternion<Scalar>::operator=(const AngleAxisType& aa)
 {
-  Scalar ha = 0.5*aa.angle();
+  Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings
  this->w() = ei_cos(ha);
  this->vec() = ei_sin(ha) * aa.axis();
  return *this;
@@ -288,18 +314,18 @@ Quaternion<Scalar>::toRotationMatrix(void) const
  // it has to be inlined, and so the return by value is not an issue
  Matrix3 res;

-  Scalar tx  = 2*this->x();
-  Scalar ty  = 2*this->y();
-  Scalar tz  = 2*this->z();
-  Scalar twx = tx*this->w();
-  Scalar twy = ty*this->w();
-  Scalar twz = tz*this->w();
-  Scalar txx = tx*this->x();
-  Scalar txy = ty*this->x();
-  Scalar txz = tz*this->x();
-  Scalar tyy = ty*this->y();
-  Scalar tyz = tz*this->y();
-  Scalar tzz = tz*this->z();
+  const Scalar tx  = 2*this->x();
+  const Scalar ty  = 2*this->y();
+  const Scalar tz  = 2*this->z();
+  const Scalar twx = tx*this->w();
+  const Scalar twy = ty*this->w();
+  const Scalar twz = tz*this->w();
+  const Scalar txx = tx*this->x();
+  const Scalar txy = ty*this->x();
+  const Scalar txz = tz*this->x();
+  const Scalar tyy = ty*this->y();
+  const Scalar tyz = tz*this->y();
+  const Scalar tzz = tz*this->z();

  res.coeffRef(0,0) = 1-(tyy+tzz);
  res.coeffRef(0,1) = txy-twz;
@@ -314,9 +340,11 @@ Quaternion<Scalar>::toRotationMatrix(void) const
  return res;
 }

-/** Makes a quaternion representing the rotation between two vectors \a a and \a b.
-  * \returns a reference to the actual quaternion
-  * Note that the two input vectors have \b not to be normalized.
+/** Sets *this to be a quaternion representing a rotation sending the vector \a a to the vector \a b.
+  *
+  * \returns a reference to *this.
+  *
+  * Note that the two input vectors do \b not have to be normalized.
  */
 template<typename Scalar>
 template<typename Derived1, typename Derived2>
@@ -324,7 +352,6 @@ inline Quaternion<Scalar>& Quaternion<Scalar>::setFromTwoVectors(const MatrixBas
 {
  Vector3 v0 = a.normalized();
  Vector3 v1 = b.normalized();
-  Vector3 axis = v0.cross(v1);
  Scalar c = v0.dot(v1);

  // if dot == 1, vectors are the same
@@ -332,11 +359,21 @@ inline Quaternion<Scalar>& Quaternion<Scalar>::setFromTwoVectors(const MatrixBas
  {
    // set to identity
    this->w() = 1; this->vec().setZero();
+    return *this;
  }
-  Scalar s = ei_sqrt((1+c)*2);
-  Scalar invs = 1./s;
+  // if dot == -1, vectors are opposites
+  if (ei_isApprox(c,Scalar(-1)))
+  {
+    this->vec() = v0.unitOrthogonal();
+    this->w() = 0;
+    return *this;
+  }
+
+  Vector3 axis = v0.cross(v1);
+  Scalar s = ei_sqrt((Scalar(1)+c)*Scalar(2));
+  Scalar invs = Scalar(1)/s;
  this->vec() = axis * invs;
-  this->w() = s * 0.5;
+  this->w() = s * Scalar(0.5);

  return *this;
 }
@@ -351,7 +388,7 @@ template <typename Scalar>
 inline Quaternion<Scalar> Quaternion<Scalar>::inverse() const
 {
  // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite()  ??
-  Scalar n2 = this->norm2();
+  Scalar n2 = this->squaredNorm();
  if (n2 > 0)
    return Quaternion(conjugate().coeffs() / n2);
  else
@@ -382,7 +419,7 @@ inline Scalar Quaternion<Scalar>::angularDistance(const Quaternion& other) const
  double d = ei_abs(this->dot(other));
  if (d>=1.0)
    return 0;
-  return 2.0 * std::acos(d);
+  return Scalar(2) * std::acos(d);
 }

 /** \returns the spherical linear interpolation between the two quaternions
@@ -438,9 +475,9 @@ struct ei_quaternion_assign_impl<Other,3,3>
      int j = (i+1)%3;
      int k = (j+1)%3;

-      t = ei_sqrt(mat.coeff(i,i)-mat.coeff(j,j)-mat.coeff(k,k) + 1.0);
-      q.coeffs().coeffRef(i) = 0.5 * t;
-      t = 0.5/t;
+      t = ei_sqrt(mat.coeff(i,i)-mat.coeff(j,j)-mat.coeff(k,k) + Scalar(1.0));
+      q.coeffs().coeffRef(i) = Scalar(0.5) * t;
+      t = Scalar(0.5)/t;
      q.w() = (mat.coeff(k,j)-mat.coeff(j,k))*t;
      q.coeffs().coeffRef(j) = (mat.coeff(j,i)+mat.coeff(i,j))*t;
      q.coeffs().coeffRef(k) = (mat.coeff(k,i)+mat.coeff(i,k))*t;
--- a/Eigen/src/Geometry/Rotation2D.h
+++ b/Eigen/src/Geometry/Rotation2D.h
@@ -25,7 +25,7 @@
 #ifndef EIGEN_ROTATION2D_H
 #define EIGEN_ROTATION2D_H

-/** \geometry_module \ingroup GeometryModule
+/** \geometry_module \ingroup Geometry_Module
  *
  * \class Rotation2D
  *
@@ -100,12 +100,35 @@ public:
    */
  inline Rotation2D slerp(Scalar t, const Rotation2D& other) const
  { return m_angle * (1-t) + other.angle() * t; }
+
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  inline typename ei_cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type cast() const
+  { return typename ei_cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type(*this); }
+
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType>
+  inline explicit Rotation2D(const Rotation2D<OtherScalarType>& other)
+  {
+    m_angle = Scalar(other.angle());
+  }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  bool isApprox(const Rotation2D& other, typename NumTraits<Scalar>::Real prec = precision<Scalar>()) const
+  { return ei_isApprox(m_angle,other.m_angle, prec); }
 };

-/** \ingroup GeometryModule
+/** \ingroup Geometry_Module
  * single precision 2D rotation type */
 typedef Rotation2D<float> Rotation2Df;
-/** \ingroup GeometryModule
+/** \ingroup Geometry_Module
  * double precision 2D rotation type */
 typedef Rotation2D<double> Rotation2Dd;

@@ -117,7 +140,7 @@ template<typename Scalar>
 template<typename Derived>
 Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
 {
-  EIGEN_STATIC_ASSERT(Derived::RowsAtCompileTime==2 && Derived::ColsAtCompileTime==2,you_did_a_programming_error);
+  EIGEN_STATIC_ASSERT(Derived::RowsAtCompileTime==2 && Derived::ColsAtCompileTime==2,YOU_MADE_A_PROGRAMMING_MISTAKE)
  m_angle = ei_atan2(mat.coeff(1,0), mat.coeff(0,0));
  return *this;
 }
--- a/Eigen/src/Geometry/RotationBase.h
+++ b/Eigen/src/Geometry/RotationBase.h
@@ -42,10 +42,31 @@ class RotationBase
    enum { Dim = _Dim };
    /** the scalar type of the coefficients */
    typedef typename ei_traits<Derived>::Scalar Scalar;
-    
+
    /** corresponding linear transformation matrix type */
    typedef Matrix<Scalar,Dim,Dim> RotationMatrixType;
+    typedef Matrix<Scalar,Dim,1> VectorType;

+  protected:
+    template<typename MatrixType, bool IsVector=MatrixType::IsVectorAtCompileTime>
+    struct generic_product_selector
+    {
+      typedef RotationMatrixType ReturnType;
+      inline static RotationMatrixType run(const Derived& r, const MatrixType& m)
+      { return r.toRotationMatrix() * m; }
+    };
+
+    template<typename OtherVectorType>
+    struct generic_product_selector<OtherVectorType,true>
+    {
+      typedef VectorType ReturnType;
+      inline static VectorType run(const Derived& r, const OtherVectorType& v)
+      {
+        return r._transformVector(v);
+      }
+    };
+
+  public:
    inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
    inline Derived& derived() { return *static_cast<Derived*>(this); }

@@ -59,13 +80,33 @@ class RotationBase
    inline Transform<Scalar,Dim> operator*(const Translation<Scalar,Dim>& t) const
    { return toRotationMatrix() * t; }

-    /** \returns the concatenation of the rotation \c *this with a scaling \a s */
-    inline RotationMatrixType operator*(const Scaling<Scalar,Dim>& s) const
-    { return toRotationMatrix() * s; }
+    /** \returns the concatenation of the rotation \c *this with a uniform scaling \a s */
+    inline RotationMatrixType operator*(const UniformScaling<Scalar>& s) const
+    { return toRotationMatrix() * s.factor(); }

-    /** \returns the concatenation of the rotation \c *this with an affine transformation \a t */
-    inline Transform<Scalar,Dim> operator*(const Transform<Scalar,Dim>& t) const
+    /** \returns the concatenation of the rotation \c *this with a generic expression \a e
+      * \a e can be:
+      *  - a DimxDim linear transformation matrix (including an axis aligned scaling)
+      *  - a vector of size Dim
+      */
+    template<typename OtherDerived>
+    inline typename generic_product_selector<OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType
+    operator*(const MatrixBase<OtherDerived>& e) const
+    { return generic_product_selector<OtherDerived>::run(derived(), e.derived()); }
+
+    /** \returns the concatenation of a linear transformation \a l with the rotation \a r */
+    template<typename OtherDerived> friend
+    inline RotationMatrixType operator*(const MatrixBase<OtherDerived>& l, const Derived& r)
+    { return l.derived() * r.toRotationMatrix(); }
+
+    /** \returns the concatenation of the rotation \c *this with a transformation \a t */
+    template<int Mode>
+    inline Transform<Scalar,Dim,Mode> operator*(const Transform<Scalar,Dim,Mode>& t) const
    { return toRotationMatrix() * t; }
+
+    template<typename OtherVectorType>
+    inline VectorType _transformVector(const OtherVectorType& v) const
+    { return toRotationMatrix() * v; }
 };

 /** \geometry_module
@@ -77,7 +118,7 @@ template<typename OtherDerived>
 Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
 ::Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
 {
-  EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim));
+  EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim))
  *this = r.toRotationMatrix();
 }

@@ -91,7 +132,7 @@ Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>&
 Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
 ::operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
 {
-  EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim));
+  EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim))
  return *this = r.toRotationMatrix();
 }

@@ -116,7 +157,7 @@ Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
 template<typename Scalar, int Dim>
 inline static Matrix<Scalar,2,2> ei_toRotationMatrix(const Scalar& s)
 {
-  EIGEN_STATIC_ASSERT(Dim==2,you_did_a_programming_error);
+  EIGEN_STATIC_ASSERT(Dim==2,YOU_MADE_A_PROGRAMMING_MISTAKE)
  return Rotation2D<Scalar>(s).toRotationMatrix();
 }

@@ -130,7 +171,7 @@ template<typename Scalar, int Dim, typename OtherDerived>
 inline static const MatrixBase<OtherDerived>& ei_toRotationMatrix(const MatrixBase<OtherDerived>& mat)
 {
  EIGEN_STATIC_ASSERT(OtherDerived::RowsAtCompileTime==Dim && OtherDerived::ColsAtCompileTime==Dim,
-    you_did_a_programming_error);
+    YOU_MADE_A_PROGRAMMING_MISTAKE)
  return mat;
 }

--- a/Eigen/src/Geometry/Scaling.h
+++ b/Eigen/src/Geometry/Scaling.h
@@ -25,138 +25,135 @@
 #ifndef EIGEN_SCALING_H
 #define EIGEN_SCALING_H

-/** \geometry_module \ingroup GeometryModule
+/** \geometry_module \ingroup Geometry_Module
  *
  * \class Scaling
  *
-  * \brief Represents a possibly non uniform scaling transformation
+  * \brief Represents a generic uniform scaling transformation
  *
  * \param _Scalar the scalar type, i.e., the type of the coefficients.
-  * \param _Dim the  dimension of the space, can be a compile time value or Dynamic
  *
-  * \note This class is not aimed to be used to store a scaling transformation,
-  * but rather to make easier the constructions and updates of Transformation object.
+  * This class represent a uniform scaling transformation. It is the return
+  * type of Scaling(Scalar), and most of the time this is the only way it
+  * is used. In particular, this class is not aimed to be used to store a scaling transformation,
+  * but rather to make easier the constructions and updates of Transform objects.
  *
-  * \sa class Translation, class Transform
+  * To represent an axis aligned scaling, use the DiagonalMatrix class.
+  *
+  * \sa Scaling(), class DiagonalMatrix, MatrixBase::asDiagonal(), class Translation, class Transform
  */
-template<typename _Scalar, int _Dim>
-class Scaling
-  #ifdef EIGEN_VECTORIZE
-  : public ei_with_aligned_operator_new<_Scalar,_Dim>
-  #endif
+template<typename _Scalar>
+class UniformScaling
 {
 public:
-  /** dimension of the space */
-  enum { Dim = _Dim };
  /** the scalar type of the coefficients */
  typedef _Scalar Scalar;
-  /** corresponding vector type */
-  typedef Matrix<Scalar,Dim,1> VectorType;
-  /** corresponding linear transformation matrix type */
-  typedef Matrix<Scalar,Dim,Dim> LinearMatrixType;
-  /** corresponding translation type */
-  typedef Translation<Scalar,Dim> TranslationType;
-  /** corresponding affine transformation type */
-  typedef Transform<Scalar,Dim> TransformType;

 protected:

-  VectorType m_coeffs;
+  Scalar m_factor;

 public:

  /** Default constructor without initialization. */
-  Scaling() {}
+  UniformScaling() {}
  /** Constructs and initialize a uniform scaling transformation */
-  explicit inline Scaling(const Scalar& s) { m_coeffs.setConstant(s); }
-  /** 2D only */
-  inline Scaling(const Scalar& sx, const Scalar& sy)
-  {
-    ei_assert(Dim==2);
-    m_coeffs.x() = sx;
-    m_coeffs.y() = sy;
-  }
-  /** 3D only */
-  inline Scaling(const Scalar& sx, const Scalar& sy, const Scalar& sz)
-  {
-    ei_assert(Dim==3);
-    m_coeffs.x() = sx;
-    m_coeffs.y() = sy;
-    m_coeffs.z() = sz;
-  }
-  /** Constructs and initialize the scaling transformation from a vector of scaling coefficients */
-  explicit inline Scaling(const VectorType& coeffs) : m_coeffs(coeffs) {}
+  explicit inline UniformScaling(const Scalar& s) : m_factor(s) {}

-  const VectorType& coeffs() const { return m_coeffs; }
-  VectorType& coeffs() { return m_coeffs; }
+  inline const Scalar& factor() const { return m_factor; }
+  inline Scalar& factor() { return m_factor; }

-  /** Concatenates two scaling */
-  inline Scaling operator* (const Scaling& other) const
-  { return Scaling(coeffs().cwise() * other.coeffs()); }
+  /** Concatenates two uniform scaling */
+  inline UniformScaling operator* (const UniformScaling& other) const
+  { return UniformScaling(m_factor * other.factor()); }

-  /** Concatenates a scaling and a translation */
-  inline TransformType operator* (const TranslationType& t) const;
+  /** Concatenates a uniform scaling and a translation */
+  template<int Dim>
+  inline Transform<Scalar,Dim> operator* (const Translation<Scalar,Dim>& t) const;

-  /** Concatenates a scaling and an affine transformation */
-  inline TransformType operator* (const TransformType& t) const;
+  /** Concatenates a uniform scaling and an affine transformation */
+  template<int Dim>
+  inline Transform<Scalar,Dim> operator* (const Transform<Scalar,Dim>& t) const;

-  /** Concatenates a scaling and a linear transformation matrix */
+  /** Concatenates a uniform scaling and a linear transformation matrix */
  // TODO returns an expression
-  inline LinearMatrixType operator* (const LinearMatrixType& other) const
-  { return coeffs().asDiagonal() * other; }
-
-  /** Concatenates a linear transformation matrix and a scaling */
-  // TODO returns an expression
-  friend inline LinearMatrixType operator* (const LinearMatrixType& other, const Scaling& s)
-  { return other * s.coeffs().asDiagonal(); }
-
  template<typename Derived>
-  inline LinearMatrixType operator*(const RotationBase<Derived,Dim>& r) const
-  { return *this * r.toRotationMatrix(); }
+  inline typename ei_plain_matrix_type<Derived>::type operator* (const MatrixBase<Derived>& other) const
+  { return other * m_factor; }

-  /** Applies scaling to vector */
-  inline VectorType operator* (const VectorType& other) const
-  { return coeffs().asDiagonal() * other; }
+  template<typename Derived,int Dim>
+  inline Matrix<Scalar,Dim,Dim> operator*(const RotationBase<Derived,Dim>& r) const
+  { return r.toRotationMatrix() * m_factor; }

  /** \returns the inverse scaling */
-  inline Scaling inverse() const
-  { return Scaling(coeffs.cwise().inverse()); }
+  inline UniformScaling inverse() const
+  { return UniformScaling(Scalar(1)/m_factor); }

-  inline Scaling& operator=(const Scaling& other)
-  {
-    m_coeffs = other.m_coeffs;
-    return *this;
-  }
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  inline UniformScaling<NewScalarType> cast() const
+  { return UniformScaling<NewScalarType>(NewScalarType(m_factor)); }
+
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType>
+  inline explicit UniformScaling(const UniformScaling<OtherScalarType>& other)
+  { m_factor = Scalar(other.factor()); }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  bool isApprox(const UniformScaling& other, typename NumTraits<Scalar>::Real prec = precision<Scalar>()) const
+  { return ei_isApprox(m_factor, other.factor(), prec); }

 };

-/** \addtogroup GeometryModule */
+/** Concatenates a linear transformation matrix and a uniform scaling */
+// NOTE this operator is defiend in MatrixBase and not as a friend function
+// of UniformScaling to fix an internal crash of Intel's ICC
+template<typename Derived> const typename MatrixBase<Derived>::ScalarMultipleReturnType
+MatrixBase<Derived>::operator*(const UniformScaling<Scalar>& s) const
+{ return derived() * s.factor(); }
+
+/** Constructs a uniform scaling from scale factor \a s */
+static inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }
+/** Constructs a uniform scaling from scale factor \a s */
+static inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); }
+/** Constructs a uniform scaling from scale factor \a s */
+template<typename RealScalar>
+static inline UniformScaling<std::complex<RealScalar> > Scaling(const std::complex<RealScalar>& s)
+{ return UniformScaling<std::complex<RealScalar> >(s); }
+
+/** Constructs a 2D axis aligned scaling */
+template<typename Scalar>
+static inline DiagonalMatrix<Scalar,2> Scaling(Scalar sx, Scalar sy)
+{ return DiagonalMatrix<Scalar,2>(sx, sy); }
+/** Constructs a 3D axis aligned scaling */
+template<typename Scalar>
+static inline DiagonalMatrix<Scalar,3> Scaling(Scalar sx, Scalar sy, Scalar sz)
+{ return DiagonalMatrix<Scalar,3>(sx, sy, sz); }
+
+/** Constructs an axis aligned scaling expression from vector expression \a coeffs
+  * This is an alias for coeffs.asDiagonal()
+  */
+template<typename Derived>
+static inline const DiagonalMatrixWrapper<Derived> Scaling(const MatrixBase<Derived>& coeffs)
+{ return coeffs.asDiagonal(); }
+
+/** \addtogroup Geometry_Module */
 //@{
-typedef Scaling<float, 2> Scaling2f;
-typedef Scaling<double,2> Scaling2d;
-typedef Scaling<float, 3> Scaling3f;
-typedef Scaling<double,3> Scaling3d;
+/** \deprecated */
+typedef DiagonalMatrix<float, 2> AlignedScaling2f;
+/** \deprecated */
+typedef DiagonalMatrix<double,2> AlignedScaling2d;
+/** \deprecated */
+typedef DiagonalMatrix<float, 3> AlignedScaling3f;
+/** \deprecated */
+typedef DiagonalMatrix<double,3> AlignedScaling3d;
 //@}

-template<typename Scalar, int Dim>
-inline typename Scaling<Scalar,Dim>::TransformType
-Scaling<Scalar,Dim>::operator* (const TranslationType& t) const
-{
-  TransformType res;
-  res.matrix().setZero();
-  res.linear().diagonal() = coeffs();
-  res.translation() = m_coeffs.cwise() * t.vector();
-  res(Dim,Dim) = Scalar(1);
-  return res;
-}
-
-template<typename Scalar, int Dim>
-inline typename Scaling<Scalar,Dim>::TransformType
-Scaling<Scalar,Dim>::operator* (const TransformType& t) const
-{
-  TransformType res = t;
-  res.prescale(m_coeffs);
-  return res;
-}
-
 #endif // EIGEN_SCALING_H
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
--- a/Eigen/src/Geometry/Translation.h
+++ b/Eigen/src/Geometry/Translation.h
@@ -25,7 +25,7 @@
 #ifndef EIGEN_TRANSLATION_H
 #define EIGEN_TRANSLATION_H

-/** \geometry_module \ingroup GeometryModule
+/** \geometry_module \ingroup Geometry_Module
  *
  * \class Translation
  *
@@ -35,17 +35,15 @@
  * \param _Dim the  dimension of the space, can be a compile time value or Dynamic
  *
  * \note This class is not aimed to be used to store a translation transformation,
-  * but rather to make easier the constructions and updates of Transformation object.
+  * but rather to make easier the constructions and updates of Transform objects.
  *
  * \sa class Scaling, class Transform
  */
 template<typename _Scalar, int _Dim>
 class Translation
-  #ifdef EIGEN_VECTORIZE
-  : public ei_with_aligned_operator_new<_Scalar,_Dim>
-  #endif
 {
 public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Dim)
  /** dimension of the space */
  enum { Dim = _Dim };
  /** the scalar type of the coefficients */
@@ -54,10 +52,8 @@ public:
  typedef Matrix<Scalar,Dim,1> VectorType;
  /** corresponding linear transformation matrix type */
  typedef Matrix<Scalar,Dim,Dim> LinearMatrixType;
-  /** corresponding scaling transformation type */
-  typedef Scaling<Scalar,Dim> ScalingType;
  /** corresponding affine transformation type */
-  typedef Transform<Scalar,Dim> TransformType;
+  typedef Transform<Scalar,Dim> AffineTransformType;

 protected:

@@ -82,7 +78,7 @@ public:
    m_coeffs.y() = sy;
    m_coeffs.z() = sz;
  }
-  /** Constructs and initialize the scaling transformation from a vector of scaling coefficients */
+  /** Constructs and initialize the translation transformation from a vector of translation coefficients */
  explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {}

  const VectorType& vector() const { return m_coeffs; }
@@ -91,32 +87,41 @@ public:
  /** Concatenates two translation */
  inline Translation operator* (const Translation& other) const
  { return Translation(m_coeffs + other.m_coeffs); }
-  
-  /** Concatenates a translation and a scaling */
-  inline TransformType operator* (const ScalingType& other) const;
+
+  /** Concatenates a translation and a uniform scaling */
+  inline AffineTransformType operator* (const UniformScaling<Scalar>& other) const;

  /** Concatenates a translation and a linear transformation */
-  inline TransformType operator* (const LinearMatrixType& linear) const;
+  template<typename OtherDerived>
+  inline AffineTransformType operator* (const MatrixBase<OtherDerived>& linear) const;

+  /** Concatenates a translation and a rotation */
  template<typename Derived>
-  inline TransformType operator*(const RotationBase<Derived,Dim>& r) const
+  inline AffineTransformType operator*(const RotationBase<Derived,Dim>& r) const
  { return *this * r.toRotationMatrix(); }

-  /** Concatenates a linear transformation and a translation */
+  /** \returns the concatenation of a linear transformation \a l with the translation \a t */
  // its a nightmare to define a templated friend function outside its declaration
-  friend inline TransformType operator* (const LinearMatrixType& linear, const Translation& t)
+  template<typename OtherDerived> friend
+  inline AffineTransformType operator*(const MatrixBase<OtherDerived>& linear, const Translation& t)
  {
-    TransformType res;
+    AffineTransformType res;
    res.matrix().setZero();
-    res.linear() = linear;
-    res.translation() = linear * t.m_coeffs;
+    res.linear() = linear.derived();
+    res.translation() = linear.derived() * t.m_coeffs;
    res.matrix().row(Dim).setZero();
    res(Dim,Dim) = Scalar(1);
    return res;
  }

  /** Concatenates a translation and an affine transformation */
-  inline TransformType operator* (const TransformType& t) const;
+  template<int Mode>
+  inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim,Mode>& t) const
+  {
+    Transform<Scalar,Dim,Mode> res = t;
+    res.pretranslate(m_coeffs);
+    return res;
+  }

  /** Applies translation to vector */
  inline VectorType operator* (const VectorType& other) const
@@ -131,9 +136,30 @@ public:
    return *this;
  }

+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  inline typename ei_cast_return_type<Translation,Translation<NewScalarType,Dim> >::type cast() const
+  { return typename ei_cast_return_type<Translation,Translation<NewScalarType,Dim> >::type(*this); }
+
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType>
+  inline explicit Translation(const Translation<OtherScalarType,Dim>& other)
+  { m_coeffs = other.vector().template cast<Scalar>(); }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  bool isApprox(const Translation& other, typename NumTraits<Scalar>::Real prec = precision<Scalar>()) const
+  { return m_coeffs.isApprox(other.m_coeffs, prec); }
+
 };

-/** \addtogroup GeometryModule */
+/** \addtogroup Geometry_Module */
 //@{
 typedef Translation<float, 2> Translation2f;
 typedef Translation<double,2> Translation2d;
@@ -141,39 +167,30 @@ typedef Translation<float, 3> Translation3f;
 typedef Translation<double,3> Translation3d;
 //@}

-
 template<typename Scalar, int Dim>
-inline typename Translation<Scalar,Dim>::TransformType
-Translation<Scalar,Dim>::operator* (const ScalingType& other) const
+inline typename Translation<Scalar,Dim>::AffineTransformType
+Translation<Scalar,Dim>::operator* (const UniformScaling<Scalar>& other) const
 {
-  TransformType res;
+  AffineTransformType res;
  res.matrix().setZero();
-  res.linear().diagonal() = other.coeffs();
+  res.linear().diagonal().fill(other.factor());
  res.translation() = m_coeffs;
  res(Dim,Dim) = Scalar(1);
  return res;
 }

 template<typename Scalar, int Dim>
-inline typename Translation<Scalar,Dim>::TransformType
-Translation<Scalar,Dim>::operator* (const LinearMatrixType& linear) const
+template<typename OtherDerived>
+inline typename Translation<Scalar,Dim>::AffineTransformType
+Translation<Scalar,Dim>::operator* (const MatrixBase<OtherDerived>& linear) const
 {
-  TransformType res;
+  AffineTransformType res;
  res.matrix().setZero();
-  res.linear() = linear;
+  res.linear() = linear.derived();
  res.translation() = m_coeffs;
  res.matrix().row(Dim).setZero();
  res(Dim,Dim) = Scalar(1);
  return res;
 }

-template<typename Scalar, int Dim>
-inline typename Translation<Scalar,Dim>::TransformType
-Translation<Scalar,Dim>::operator* (const TransformType& t) const
-{
-  TransformType res = t;
-  res.pretranslate(m_coeffs);
-  return res;
-}
-
 #endif // EIGEN_TRANSLATION_H
--- a/Eigen/src/Geometry/arch/CMakeLists.txt
+++ b/Eigen/src/Geometry/arch/CMakeLists.txt
@@ -0,0 +1,6 @@
+FILE(GLOB Eigen_Geometry_arch_SRCS "*.h")
+
+INSTALL(FILES
+  ${Eigen_Geometry_arch_SRCS}
+  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Geometry/arch COMPONENT Devel
+  )
--- a/Show More
+++ b/Show More