bump

work around brain dead ICC
std:: namespace fixup for more restricive compilers such as QNX's QCC
2026-04-10 11:34:33 +08:00 · 2010-02-11 21:39:41 -05:00 · 2010-02-11 19:32:56 -05:00 · 2010-02-10 22:27:35 +01:00 · 2010-02-03 21:55:01 +01:00 · 2010-02-02 07:06:15 -05:00
34 changed files with 558 additions and 250 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,7 +7,7 @@ set(INCLUDE_INSTALL_DIR
    "The directory where we install the header files"
    FORCE)

-set(EIGEN_VERSION_NUMBER "2.0.10")
+set(EIGEN_VERSION_NUMBER "2.0.12")
 set(EIGEN_VERSION "${EIGEN_VERSION_NUMBER}")

 set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -26,7 +26,7 @@
  #define EIGEN_SSE2_BUT_NOT_OLD_GCC
 #endif

-#ifndef EIGEN_DONT_VECTORIZE
+#if !defined(EIGEN_DONT_VECTORIZE) && !defined(EIGEN_DONT_ALIGN)
  #if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
    #define EIGEN_VECTORIZE
    #define EIGEN_VECTORIZE_SSE
--- a/Eigen/src/Core/DiagonalCoeffs.h
+++ b/Eigen/src/Core/DiagonalCoeffs.h
@@ -47,11 +47,11 @@ struct ei_traits<DiagonalCoeffs<MatrixType> >
  typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
  enum {
    RowsAtCompileTime = int(MatrixType::SizeAtCompileTime) == Dynamic ? Dynamic
-                      : EIGEN_ENUM_MIN(MatrixType::RowsAtCompileTime,
+                      : EIGEN_SIZE_MIN(MatrixType::RowsAtCompileTime,
                                       MatrixType::ColsAtCompileTime),
    ColsAtCompileTime = 1,
    MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic
-                            : EIGEN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime,
+                            : EIGEN_SIZE_MIN(MatrixType::MaxRowsAtCompileTime,
                                             MatrixType::MaxColsAtCompileTime),
    MaxColsAtCompileTime = 1,
    Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit),
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -99,7 +99,7 @@ template<typename Derived> class MapBase
    inline const Scalar coeff(int index) const
    {
      ei_assert(Derived::IsVectorAtCompileTime || (ei_traits<Derived>::Flags & LinearAccessBit));
-      if ( ((RowsAtCompileTime == 1) == IsRowMajor) )
+      if ( ((RowsAtCompileTime == 1) == IsRowMajor) || !int(Derived::IsVectorAtCompileTime) )
        return m_data[index];
      else
        return m_data[index*stride()];
@@ -108,7 +108,7 @@ template<typename Derived> class MapBase
    inline Scalar& coeffRef(int index)
    {
      ei_assert(Derived::IsVectorAtCompileTime || (ei_traits<Derived>::Flags & LinearAccessBit));
-      if ( ((RowsAtCompileTime == 1) == IsRowMajor) )
+      if ( ((RowsAtCompileTime == 1) == IsRowMajor)  || !int(Derived::IsVectorAtCompileTime) )
        return const_cast<Scalar*>(m_data)[index];
      else
        return const_cast<Scalar*>(m_data)[index*stride()];
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -54,7 +54,7 @@ template<> inline int machine_epsilon<int>() { return 0; }
 inline int ei_real(int x)  { return x; }
 inline int ei_imag(int)    { return 0; }
 inline int ei_conj(int x)  { return x; }
-inline int ei_abs(int x)   { return abs(x); }
+inline int ei_abs(int x)   { return std::abs(x); }
 inline int ei_abs2(int x)  { return x*x; }
 inline int ei_sqrt(int)  { ei_assert(false); return 0; }
 inline int ei_exp(int)  { ei_assert(false); return 0; }
@@ -67,7 +67,7 @@ inline int ei_pow(int x, int y) { return int(std::pow(double(x), y)); }
 template<> inline int ei_random(int a, int b)
 {
  // We can't just do rand()%n as only the high-order bits are really random
-  return a + static_cast<int>((b-a+1) * (rand() / (RAND_MAX + 1.0)));
+  return a + static_cast<int>((b-a+1) * (std::rand() / (RAND_MAX + 1.0)));
 }
 template<> inline int ei_random()
 {
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -592,7 +592,8 @@ template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int
 template<typename OtherDerived>
 inline void Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::swap(const MatrixBase<OtherDerived>& other)
 {
-  ei_matrix_swap_impl<Matrix, OtherDerived>::run(*this, *const_cast<MatrixBase<OtherDerived>*>(&other));
+  // the Eigen:: here is to work around a stupid ICC 11.1 bug.
+  Eigen::ei_matrix_swap_impl<Matrix, OtherDerived>::run(*this, *const_cast<MatrixBase<OtherDerived>*>(&other));
 }


--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -583,7 +583,8 @@ template<typename Derived> class MatrixBase

    const LU<PlainMatrixType> lu() const;
    const PlainMatrixType inverse() const;
-    void computeInverse(PlainMatrixType *result) const;
+    template<typename ResultType>
+    void computeInverse(MatrixBase<ResultType> *result) const;
    Scalar determinant() const;

 /////////// Cholesky module ///////////
--- a/Eigen/src/Core/MatrixStorage.h
+++ b/Eigen/src/Core/MatrixStorage.h
@@ -40,7 +40,7 @@ template <typename T, int Size, int MatrixOptions,
  ei_matrix_array()
  {
    #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
-    ei_assert((reinterpret_cast<size_t>(array) & 0xf) == 0
+    ei_assert((reinterpret_cast<std::size_t>(array) & 0xf) == 0
              && "this assertion is explained here: http://eigen.tuxfamily.org/dox/UnalignedArrayAssert.html  **** READ THIS WEB PAGE !!! ****");
    #endif
  }
--- a/Eigen/src/Core/Part.h
+++ b/Eigen/src/Core/Part.h
@@ -50,7 +50,7 @@ struct ei_traits<Part<MatrixType, Mode> > : ei_traits<MatrixType>
  typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
  enum {
    Flags = (_MatrixTypeNested::Flags & (HereditaryBits) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) | Mode,
-    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+    CoeffReadCost = _MatrixTypeNested::CoeffReadCost + ConditionalJumpCost
  };
 };

--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -66,11 +66,8 @@ struct ProductReturnType
 template<typename Lhs, typename Rhs>
 struct ProductReturnType<Lhs,Rhs,CacheFriendlyProduct>
 {
-  typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
-
-  typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime,
-                             typename ei_plain_matrix_type_column_major<Rhs>::type
-                   >::type RhsNested;
+  typedef const Lhs& LhsNested;
+  typedef const Rhs& RhsNested;

  typedef Product<LhsNested, RhsNested, CacheFriendlyProduct> Type;
 };
@@ -128,7 +125,7 @@ struct ei_traits<Product<LhsNested, RhsNested, ProductMode> >

    RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
    ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
-    InnerSize = EIGEN_ENUM_MIN(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
+    InnerSize = EIGEN_SIZE_MIN(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),

    MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
@@ -144,7 +141,7 @@ struct ei_traits<Product<LhsNested, RhsNested, ProductMode> >

    EvalToRowMajor = RhsRowMajor && (ProductMode==(int)CacheFriendlyProduct ? LhsRowMajor : (!CanVectorizeLhs)),

-    RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit),
+    RemovedBits = ~((EvalToRowMajor ? 0 : RowMajorBit)|DirectAccessBit),

    Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits)
          | EvalBeforeAssigningBit
@@ -571,7 +568,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect
    else
    {
      _res = ei_aligned_stack_new(Scalar,res.size());
-      Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res;
+      Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1,ColMajor> >(_res, res.size()) = res;
    }
    ei_cache_friendly_product_colmajor_times_vector(res.size(),
      &product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(),
@@ -579,7 +576,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect

    if (!EvalToRes)
    {
-      res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
+      res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1,ColMajor> >(_res, res.size());
      ei_aligned_stack_delete(Scalar, _res, res.size());
    }
  }
@@ -617,7 +614,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
    else
    {
      _res = ei_aligned_stack_new(Scalar, res.size());
-      Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size()) = res;
+      Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1,ColMajor> >(_res, res.size()) = res;
    }
    ei_cache_friendly_product_colmajor_times_vector(res.size(),
      &product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(),
@@ -625,7 +622,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo

    if (!EvalToRes)
    {
-      res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
+      res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1,ColMajor> >(_res, res.size());
      ei_aligned_stack_delete(Scalar, _res, res.size());
    }
  }
@@ -650,7 +647,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,RowMajor,HasDirect
    else
    {
      _rhs = ei_aligned_stack_new(Scalar, product.rhs().size());
-      Map<Matrix<Scalar,Rhs::SizeAtCompileTime,1> >(_rhs, product.rhs().size()) = product.rhs();
+      Map<Matrix<Scalar,Rhs::SizeAtCompileTime,1,ColMajor> >(_rhs, product.rhs().size()) = product.rhs();
    }
    ei_cache_friendly_product_rowmajor_times_vector(&product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(),
                                                    _rhs, product.rhs().size(), res);
@@ -678,7 +675,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
    else
    {
      _lhs = ei_aligned_stack_new(Scalar, product.lhs().size());
-      Map<Matrix<Scalar,Lhs::SizeAtCompileTime,1> >(_lhs, product.lhs().size()) = product.lhs();
+      Map<Matrix<Scalar,Lhs::SizeAtCompileTime,1,ColMajor> >(_lhs, product.lhs().size()) = product.lhs();
    }
    ei_cache_friendly_product_rowmajor_times_vector(&product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(),
                                                    _lhs, product.lhs().size(), res);
@@ -709,7 +706,17 @@ MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProdu
  if (other._expression()._useCacheFriendlyProduct())
    ei_cache_friendly_product_selector<Product<Lhs,Rhs,CacheFriendlyProduct> >::run(const_cast_derived(), other._expression());
  else
-    lazyAssign(derived() + other._expression());
+  {
+    typedef typename ei_cleantype<Lhs>::type _Lhs;
+    typedef typename ei_cleantype<Rhs>::type _Rhs;
+  
+    typedef typename ei_nested<_Lhs,_Rhs::ColsAtCompileTime>::type LhsNested;
+    typedef typename ei_nested<_Rhs,_Lhs::RowsAtCompileTime>::type RhsNested;
+
+    Product<LhsNested,RhsNested,NormalProduct> prod(other._expression().lhs(),other._expression().rhs());
+    
+    lazyAssign(derived() + prod);
+  }
  return derived();
 }

@@ -724,12 +731,21 @@ inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFrien
  }
  else
  {
-    lazyAssign<Product<Lhs,Rhs,CacheFriendlyProduct> >(product);
+    typedef typename ei_cleantype<Lhs>::type _Lhs;
+    typedef typename ei_cleantype<Rhs>::type _Rhs;
+
+    typedef typename ei_nested<_Lhs,_Rhs::ColsAtCompileTime>::type LhsNested;
+    typedef typename ei_nested<_Rhs,_Lhs::RowsAtCompileTime>::type RhsNested;
+
+    typedef Product<LhsNested,RhsNested,NormalProduct> NormalProduct;
+    NormalProduct normal_prod(product.lhs(),product.rhs());
+
+    lazyAssign<NormalProduct>(normal_prod);
  }
  return derived();
 }

-template<typename T> struct ei_product_copy_rhs
+template<typename T,int StorageOrder> struct ei_product_copy_rhs
 {
  typedef typename ei_meta_if<
         (ei_traits<T>::Flags & RowMajorBit)
@@ -739,11 +755,30 @@ template<typename T> struct ei_product_copy_rhs
    >::ret type;
 };

-template<typename T> struct ei_product_copy_lhs
+template<typename T> struct ei_product_copy_rhs<T,RowMajorBit>
+{
+  typedef typename ei_meta_if<
+      (!(ei_traits<T>::Flags & DirectAccessBit)),
+      typename ei_plain_matrix_type<T>::type,
+      const T&
+    >::ret type;
+};
+
+template<typename T,int StorageOrder> struct ei_product_copy_lhs
 {
  typedef typename ei_meta_if<
      (!(int(ei_traits<T>::Flags) & DirectAccessBit)),
-      typename ei_plain_matrix_type<T>::type,
+      typename ei_plain_matrix_type_row_major<T>::type,
+      const T&
+    >::ret type;
+};
+
+template<typename T> struct ei_product_copy_lhs<T,RowMajorBit>
+{
+  typedef typename ei_meta_if<
+         ((ei_traits<T>::Flags & RowMajorBit)==0)
+      || (!(int(ei_traits<T>::Flags) & DirectAccessBit)),
+      typename ei_plain_matrix_type_row_major<T>::type,
      const T&
    >::ret type;
 };
@@ -752,9 +787,9 @@ template<typename Lhs, typename Rhs, int ProductMode>
 template<typename DestDerived>
 inline void Product<Lhs,Rhs,ProductMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const
 {
-  typedef typename ei_product_copy_lhs<_LhsNested>::type LhsCopy;
+  typedef typename ei_product_copy_lhs<_LhsNested,DestDerived::Flags&RowMajorBit>::type LhsCopy;
  typedef typename ei_unref<LhsCopy>::type _LhsCopy;
-  typedef typename ei_product_copy_rhs<_RhsNested>::type RhsCopy;
+  typedef typename ei_product_copy_rhs<_RhsNested,DestDerived::Flags&RowMajorBit>::type RhsCopy;
  typedef typename ei_unref<RhsCopy>::type _RhsCopy;
  LhsCopy lhs(m_lhs);
  RhsCopy rhs(m_rhs);
@@ -764,6 +799,7 @@ inline void Product<Lhs,Rhs,ProductMode>::_cacheFriendlyEvalAndAdd(DestDerived&
    _RhsCopy::Flags&RowMajorBit, (const Scalar*)&(rhs.const_cast_derived().coeffRef(0,0)), rhs.stride(),
    DestDerived::Flags&RowMajorBit, (Scalar*)&(res.coeffRef(0,0)), res.stride()
  );
+
 }

 #endif // EIGEN_PRODUCT_H
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -30,7 +30,7 @@

 #define EIGEN_WORLD_VERSION 2
 #define EIGEN_MAJOR_VERSION 0
-#define EIGEN_MINOR_VERSION 10
+#define EIGEN_MINOR_VERSION 12

 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                      (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -257,6 +257,9 @@ enum { RowsAtCompileTime = Eigen::ei_traits<Derived>::RowsAtCompileTime, \
 _EIGEN_GENERIC_PUBLIC_INTERFACE(Derived, Eigen::MatrixBase<Derived>)

 #define EIGEN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
+#define EIGEN_SIZE_MIN(a,b) (((int)a == 1 || (int)b == 1) ? 1 \
+                           : ((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
+                           : ((int)a <= (int)b) ? (int)a : (int)b)
 #define EIGEN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)

 // just an empty macro !
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -59,10 +59,10 @@
  * Fast, but wastes 16 additional bytes of memory.
  * Does not throw any exception.
  */
-inline void* ei_handmade_aligned_malloc(size_t size)
+inline void* ei_handmade_aligned_malloc(std::size_t size)
 {
-  void *original = malloc(size+16);
-  void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+  void *original = std::malloc(size+16);
+  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
  *(reinterpret_cast<void**>(aligned) - 1) = original;
  return aligned;
 }
@@ -71,13 +71,13 @@ inline void* ei_handmade_aligned_malloc(size_t size)
 inline void ei_handmade_aligned_free(void *ptr)
 {
  if(ptr)
-    free(*(reinterpret_cast<void**>(ptr) - 1));
+    std::free(*(reinterpret_cast<void**>(ptr) - 1));
 }

 /** \internal allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment.
  * On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
  */
-inline void* ei_aligned_malloc(size_t size)
+inline void* ei_aligned_malloc(std::size_t size)
 {
  #ifdef EIGEN_NO_MALLOC
    ei_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
@@ -108,18 +108,18 @@ inline void* ei_aligned_malloc(size_t size)
 /** allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
  * On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
  */
-template<bool Align> inline void* ei_conditional_aligned_malloc(size_t size)
+template<bool Align> inline void* ei_conditional_aligned_malloc(std::size_t size)
 {
  return ei_aligned_malloc(size);
 }

-template<> inline void* ei_conditional_aligned_malloc<false>(size_t size)
+template<> inline void* ei_conditional_aligned_malloc<false>(std::size_t size)
 {
  #ifdef EIGEN_NO_MALLOC
    ei_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
  #endif

-  void *result = malloc(size);
+  void *result = std::malloc(size);
  #ifdef EIGEN_EXCEPTIONS
    if(!result) throw std::bad_alloc();
  #endif
@@ -129,9 +129,9 @@ template<> inline void* ei_conditional_aligned_malloc<false>(size_t size)
 /** \internal construct the elements of an array.
  * The \a size parameter tells on how many objects to call the constructor of T.
  */
-template<typename T> inline T* ei_construct_elements_of_array(T *ptr, size_t size)
+template<typename T> inline T* ei_construct_elements_of_array(T *ptr, std::size_t size)
 {
-  for (size_t i=0; i < size; ++i) ::new (ptr + i) T;
+  for (std::size_t i=0; i < size; ++i) ::new (ptr + i) T;
  return ptr;
 }

@@ -139,13 +139,13 @@ template<typename T> inline T* ei_construct_elements_of_array(T *ptr, size_t siz
  * On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown.
  * The default constructor of T is called.
  */
-template<typename T> inline T* ei_aligned_new(size_t size)
+template<typename T> inline T* ei_aligned_new(std::size_t size)
 {
  T *result = reinterpret_cast<T*>(ei_aligned_malloc(sizeof(T)*size));
  return ei_construct_elements_of_array(result, size);
 }

-template<typename T, bool Align> inline T* ei_conditional_aligned_new(size_t size)
+template<typename T, bool Align> inline T* ei_conditional_aligned_new(std::size_t size)
 {
  T *result = reinterpret_cast<T*>(ei_conditional_aligned_malloc<Align>(sizeof(T)*size));
  return ei_construct_elements_of_array(result, size);
@@ -179,13 +179,13 @@ template<bool Align> inline void ei_conditional_aligned_free(void *ptr)

 template<> inline void ei_conditional_aligned_free<false>(void *ptr)
 {
-  free(ptr);
+  std::free(ptr);
 }

 /** \internal destruct the elements of an array.
  * The \a size parameters tells on how many objects to call the destructor of T.
  */
-template<typename T> inline void ei_destruct_elements_of_array(T *ptr, size_t size)
+template<typename T> inline void ei_destruct_elements_of_array(T *ptr, std::size_t size)
 {
  // always destruct an array starting from the end.
  while(size) ptr[--size].~T();
@@ -194,7 +194,7 @@ template<typename T> inline void ei_destruct_elements_of_array(T *ptr, size_t si
 /** \internal delete objects constructed with ei_aligned_new
  * The \a size parameters tells on how many objects to call the destructor of T.
  */
-template<typename T> inline void ei_aligned_delete(T *ptr, size_t size)
+template<typename T> inline void ei_aligned_delete(T *ptr, std::size_t size)
 {
  ei_destruct_elements_of_array<T>(ptr, size);
  ei_aligned_free(ptr);
@@ -203,24 +203,53 @@ template<typename T> inline void ei_aligned_delete(T *ptr, size_t size)
 /** \internal delete objects constructed with ei_conditional_aligned_new
  * The \a size parameters tells on how many objects to call the destructor of T.
  */
-template<typename T, bool Align> inline void ei_conditional_aligned_delete(T *ptr, size_t size)
+template<typename T, bool Align> inline void ei_conditional_aligned_delete(T *ptr, std::size_t size)
 {
  ei_destruct_elements_of_array<T>(ptr, size);
  ei_conditional_aligned_free<Align>(ptr);
 }

-/** \internal \returns the number of elements which have to be skipped such that data are 16 bytes aligned */
-template<typename Scalar>
-inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
+/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
+  *
+  * \param array the address of the start of the array
+  * \param size the size of the array
+  *
+  * \note If no element of the array is well aligned, the size of the array is returned. Typically,
+  * for example with SSE, "well aligned" means 16-byte-aligned. If vectorization is disabled or if the
+  * packet size for the given scalar type is 1, then everything is considered well-aligned.
+  *
+  * \note If the scalar type is vectorizable, we rely on the following assumptions: sizeof(Scalar) is a
+  * power of 2, the packet size in bytes is also a power of 2, and is a multiple of sizeof(Scalar). On the
+  * other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for
+  * example with Scalar=double on certain 32-bit platforms, see bug #79.
+  *
+  * There is also the variant ei_first_aligned(const MatrixBase&, Integer) defined in Coeffs.h.
+  */
+template<typename Scalar, typename Integer>
+inline static Integer ei_alignmentOffset(const Scalar* array, Integer size)
 {
  typedef typename ei_packet_traits<Scalar>::type Packet;
-  const int PacketSize = ei_packet_traits<Scalar>::size;
-  const int PacketAlignedMask = PacketSize-1;
-  const bool Vectorized = PacketSize>1;
-  return Vectorized
-          ? std::min<int>( (PacketSize - (int((size_t(ptr)/sizeof(Scalar))) & PacketAlignedMask))
-                           & PacketAlignedMask, maxOffset)
-          : 0;
+  enum { PacketSize = ei_packet_traits<Scalar>::size,
+         PacketAlignedMask = PacketSize-1
+  };
+  
+  if(PacketSize==1)
+  {
+    // Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
+    // of the array have the same aligment.
+    return 0;
+  }
+  else if(size_t(array) & (sizeof(Scalar)-1))
+  {
+    // There is vectorization for this scalar type, but the array is not aligned to the size of a single scalar.
+    // Consequently, no element of the array is well aligned.
+    return size;
+  }
+  else
+  {
+    return std::min<Integer>( (PacketSize - (Integer((size_t(array)/sizeof(Scalar))) & PacketAlignedMask))
+                           & PacketAlignedMask, size);
+  }
 }

 /** \internal
@@ -252,23 +281,23 @@ inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
 #if EIGEN_ALIGN
  #ifdef EIGEN_EXCEPTIONS
    #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
-      void* operator new(size_t size, const std::nothrow_t&) throw() { \
+      void* operator new(std::size_t size, const std::nothrow_t&) throw() { \
        try { return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); } \
        catch (...) { return 0; } \
        return 0; \
      }
  #else
    #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
-      void* operator new(size_t size, const std::nothrow_t&) throw() { \
+      void* operator new(std::size_t size, const std::nothrow_t&) throw() { \
        return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); \
      }
  #endif

  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
-      void *operator new(size_t size) { \
+      void *operator new(std::size_t size) { \
        return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); \
      } \
-      void *operator new[](size_t size) { \
+      void *operator new[](std::size_t size) { \
        return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); \
      } \
      void operator delete(void * ptr) throw() { Eigen::ei_conditional_aligned_free<NeedsToAlign>(ptr); } \
@@ -276,7 +305,7 @@ inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
      /* in-place new and delete. since (at least afaik) there is no actual   */ \
      /* memory allocated we can safely let the default implementation handle */ \
      /* this particular case. */ \
-      static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
+      static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
      void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \
      /* nothrow-new (returns zero instead of std::bad_alloc) */ \
      EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
@@ -310,8 +339,8 @@ template<class T>
 class aligned_allocator
 {
 public:
-    typedef size_t    size_type;
-    typedef ptrdiff_t difference_type;
+    typedef std::size_t    size_type;
+    typedef std::ptrdiff_t difference_type;
    typedef T*        pointer;
    typedef const T*  const_pointer;
    typedef T&        reference;
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -161,6 +161,19 @@ template<typename T> struct ei_plain_matrix_type_column_major
          > type;
 };

+/* ei_plain_matrix_type_row_major : same as ei_plain_matrix_type but guaranteed to be row-major
+ */
+template<typename T> struct ei_plain_matrix_type_row_major
+{
+  typedef Matrix<typename ei_traits<T>::Scalar,
+                ei_traits<T>::RowsAtCompileTime,
+                ei_traits<T>::ColsAtCompileTime,
+                AutoAlign | RowMajor,
+                ei_traits<T>::MaxRowsAtCompileTime,
+                ei_traits<T>::MaxColsAtCompileTime
+          > type;
+};
+
 template<typename T> struct ei_must_nest_by_value { enum { ret = false }; };
 template<typename T> struct ei_must_nest_by_value<NestByValue<T> > { enum { ret = true }; };

--- a/Eigen/src/Geometry/Hyperplane.h
+++ b/Eigen/src/Geometry/Hyperplane.h
@@ -52,9 +52,9 @@ public:
  typedef _Scalar Scalar;
  typedef typename NumTraits<Scalar>::Real RealScalar;
  typedef Matrix<Scalar,AmbientDimAtCompileTime,1> VectorType;
-  typedef Matrix<Scalar,AmbientDimAtCompileTime==Dynamic
+  typedef Matrix<Scalar,int(AmbientDimAtCompileTime)==Dynamic
                        ? Dynamic
-                        : AmbientDimAtCompileTime+1,1> Coefficients;
+                        : int(AmbientDimAtCompileTime)+1,1> Coefficients;
  typedef Block<Coefficients,AmbientDimAtCompileTime,1> NormalReturnType;

  /** Default constructor without initialization */
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -450,22 +450,31 @@ inline Scalar Quaternion<Scalar>::angularDistance(const Quaternion& other) const
 template <typename Scalar>
 Quaternion<Scalar> Quaternion<Scalar>::slerp(Scalar t, const Quaternion& other) const
 {
-  static const Scalar one = Scalar(1) - precision<Scalar>();
+  static const Scalar one = Scalar(1) - machine_epsilon<Scalar>();
  Scalar d = this->dot(other);
  Scalar absD = ei_abs(d);
+
+  Scalar scale0;
+  Scalar scale1;
+
  if (absD>=one)
-    return *this;
+  {
+    scale0 = Scalar(1) - t;
+    scale1 = t;
+  }
+  else
+  {
+    // theta is the angle between the 2 quaternions
+    Scalar theta = std::acos(absD);
+    Scalar sinTheta = ei_sin(theta);

-  // theta is the angle between the 2 quaternions
-  Scalar theta = std::acos(absD);
-  Scalar sinTheta = ei_sin(theta);
+    scale0 = ei_sin( ( Scalar(1) - t ) * theta) / sinTheta;
+    scale1 = ei_sin( ( t * theta) ) / sinTheta;
+    if (d<0)
+      scale1 = -scale1;
+  }

-  Scalar scale0 = ei_sin( ( Scalar(1) - t ) * theta) / sinTheta;
-  Scalar scale1 = ei_sin( ( t * theta) ) / sinTheta;
-  if (d<0)
-    scale1 = -scale1;
-
-  return Quaternion(scale0 * m_coeffs + scale1 * other.m_coeffs);
+  return Quaternion<Scalar>(scale0 * coeffs() + scale1 * other.coeffs());
 }

 // set from a rotation matrix
--- a/Eigen/src/LU/Inverse.h
+++ b/Eigen/src/LU/Inverse.h
@@ -29,8 +29,8 @@
 *** Part 1 : optimized implementations for fixed-size 2,3,4 cases ***
 ********************************************************************/

-template<typename MatrixType>
-void ei_compute_inverse_in_size2_case(const MatrixType& matrix, MatrixType* result)
+template<typename XprType, typename MatrixType>
+void ei_compute_inverse_in_size2_case(const XprType& matrix, MatrixType* result)
 {
  typedef typename MatrixType::Scalar Scalar;
  const Scalar invdet = Scalar(1) / matrix.determinant();
@@ -54,10 +54,10 @@ bool ei_compute_inverse_in_size2_case_with_check(const XprType& matrix, MatrixTy
  return true;
 }

-template<typename MatrixType>
-void ei_compute_inverse_in_size3_case(const MatrixType& matrix, MatrixType* result)
+template<typename Derived, typename OtherDerived>
+void ei_compute_inverse_in_size3_case(const Derived& matrix, OtherDerived* result)
 {
-  typedef typename MatrixType::Scalar Scalar;
+  typedef typename Derived::Scalar Scalar;
  const Scalar det_minor00 = matrix.minor(0,0).determinant();
  const Scalar det_minor10 = matrix.minor(1,0).determinant();
  const Scalar det_minor20 = matrix.minor(2,0).determinant();
@@ -75,148 +75,204 @@ void ei_compute_inverse_in_size3_case(const MatrixType& matrix, MatrixType* resu
  result->coeffRef(2, 2) = matrix.minor(2,2).determinant() * invdet;
 }

-template<typename MatrixType>
-bool ei_compute_inverse_in_size4_case_helper(const MatrixType& matrix, MatrixType* result)
+template<typename Derived, typename OtherDerived, typename Scalar = typename Derived::Scalar>
+struct ei_compute_inverse_in_size4_case
 {
-  /* Let's split M into four 2x2 blocks:
-    * (P Q)
-    * (R S)
-    * If P is invertible, with inverse denoted by P_inverse, and if
-    * (S - R*P_inverse*Q) is also invertible, then the inverse of M is
-    * (P' Q')
-    * (R' S')
-    * where
-    * S' = (S - R*P_inverse*Q)^(-1)
-    * P' = P1 + (P1*Q) * S' *(R*P_inverse)
-    * Q' = -(P_inverse*Q) * S'
-    * R' = -S' * (R*P_inverse)
-    */
-  typedef Block<MatrixType,2,2> XprBlock22;
-  typedef typename MatrixBase<XprBlock22>::PlainMatrixType Block22;
-  Block22 P_inverse;
-  if(ei_compute_inverse_in_size2_case_with_check(matrix.template block<2,2>(0,0), &P_inverse))
+  static void run(const Derived& matrix, OtherDerived& result)
  {
-    const Block22 Q = matrix.template block<2,2>(0,2);
-    const Block22 P_inverse_times_Q = P_inverse * Q;
-    const XprBlock22 R = matrix.template block<2,2>(2,0);
-    const Block22 R_times_P_inverse = R * P_inverse;
-    const Block22 R_times_P_inverse_times_Q = R_times_P_inverse * Q;
-    const XprBlock22 S = matrix.template block<2,2>(2,2);
-    const Block22 X = S - R_times_P_inverse_times_Q;
-    Block22 Y;
-    ei_compute_inverse_in_size2_case(X, &Y);
-    result->template block<2,2>(2,2) = Y;
-    result->template block<2,2>(2,0) = - Y * R_times_P_inverse;
-    const Block22 Z = P_inverse_times_Q * Y;
-    result->template block<2,2>(0,2) = - Z;
-    result->template block<2,2>(0,0) = P_inverse + Z * R_times_P_inverse;
-    return true;
+    result.coeffRef(0,0) = matrix.minor(0,0).determinant();
+    result.coeffRef(1,0) = -matrix.minor(0,1).determinant();
+    result.coeffRef(2,0) = matrix.minor(0,2).determinant();
+    result.coeffRef(3,0) = -matrix.minor(0,3).determinant();
+    result.coeffRef(0,2) = matrix.minor(2,0).determinant();
+    result.coeffRef(1,2) = -matrix.minor(2,1).determinant();
+    result.coeffRef(2,2) = matrix.minor(2,2).determinant();
+    result.coeffRef(3,2) = -matrix.minor(2,3).determinant();
+    result.coeffRef(0,1) = -matrix.minor(1,0).determinant();
+    result.coeffRef(1,1) = matrix.minor(1,1).determinant();
+    result.coeffRef(2,1) = -matrix.minor(1,2).determinant();
+    result.coeffRef(3,1) = matrix.minor(1,3).determinant();
+    result.coeffRef(0,3) = -matrix.minor(3,0).determinant();
+    result.coeffRef(1,3) = matrix.minor(3,1).determinant();
+    result.coeffRef(2,3) = -matrix.minor(3,2).determinant();
+    result.coeffRef(3,3) = matrix.minor(3,3).determinant();
+    result /= (matrix.col(0).cwise()*result.row(0).transpose()).sum();
  }
-  else
-  {
-    return false;
-  }
-}
+};

-template<typename MatrixType>
-void ei_compute_inverse_in_size4_case(const MatrixType& _matrix, MatrixType* result)
+#ifdef EIGEN_VECTORIZE_SSE
+// The SSE code for the 4x4 float matrix inverse in this file comes from the file
+//   ftp://download.intel.com/design/PentiumIII/sml/24504301.pdf
+// its copyright information is:
+//   Copyright (C) 1999 Intel Corporation
+// See page ii of that document for legal stuff. Not being lawyers, we just assume
+// here that if Intel makes this document publically available, with source code
+// and detailed explanations, it's because they want their CPUs to be fed with
+// good code, and therefore they presumably don't mind us using it in Eigen.
+template<typename Derived, typename OtherDerived>
+struct ei_compute_inverse_in_size4_case<Derived, OtherDerived, float>
 {
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::RealScalar RealScalar;
+  static void run(const Derived& matrix, OtherDerived& result)
+  {
+    // Variables (Streaming SIMD Extensions registers) which will contain cofactors and, later, the
+    // lines of the inverted matrix.
+    __m128 minor0, minor1, minor2, minor3;

-  // we will do row permutations on the matrix. This copy should have negligible cost.
-  // if not, consider working in-place on the matrix (const-cast it, but then undo the permutations
-  // to nevertheless honor constness)
-  typename MatrixType::PlainMatrixType matrix(_matrix);
+    // Variables which will contain the lines of the reference matrix and, later (after the transposition),
+    // the columns of the original matrix.
+    __m128 row0, row1, row2, row3;

-  // let's extract from the 2 first colums a 2x2 block whose determinant is as big as possible.
-  int good_row0, good_row1, good_i;
-  Matrix<RealScalar,6,1> absdet;
+    // Temporary variables and the variable that will contain the matrix determinant.
+    __m128 det, tmp1;

-  // any 2x2 block with determinant above this threshold will be considered good enough.
-  // The magic value 1e-1 here comes from experimentation. The bigger it is, the higher the precision,
-  // the slower the computation. This value 1e-1 gives precision almost as good as the brutal cofactors
-  // algorithm, both in average and in worst-case precision.
-  RealScalar d = (matrix.col(0).squaredNorm()+matrix.col(1).squaredNorm()) * RealScalar(1e-1);
-  #define ei_inv_size4_helper_macro(i,row0,row1) \
-  absdet[i] = ei_abs(matrix.coeff(row0,0)*matrix.coeff(row1,1) \
-                                - matrix.coeff(row0,1)*matrix.coeff(row1,0)); \
-  if(absdet[i] > d) { good_row0=row0; good_row1=row1; goto good; }
-  ei_inv_size4_helper_macro(0,0,1)
-  ei_inv_size4_helper_macro(1,0,2)
-  ei_inv_size4_helper_macro(2,0,3)
-  ei_inv_size4_helper_macro(3,1,2)
-  ei_inv_size4_helper_macro(4,1,3)
-  ei_inv_size4_helper_macro(5,2,3)
+    // Matrix transposition
+    const float *src = matrix.data();
+    tmp1  = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)src)), (__m64*)(src+ 4));
+    row1  = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)(src+8))), (__m64*)(src+12));
+    row0  = _mm_shuffle_ps(tmp1, row1, 0x88);
+    row1  = _mm_shuffle_ps(row1, tmp1, 0xDD);
+    tmp1  = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)(src+ 2))), (__m64*)(src+ 6));
+    row3  = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)(src+10))), (__m64*)(src+14));
+    row2  = _mm_shuffle_ps(tmp1, row3, 0x88);
+    row3  = _mm_shuffle_ps(row3, tmp1, 0xDD);

-  // no 2x2 block has determinant bigger than the threshold. So just take the one that
-  // has the biggest determinant
-  absdet.maxCoeff(&good_i);
-  good_row0 = good_i <= 2 ? 0 : good_i <= 4 ? 1 : 2;
-  good_row1 = good_i <= 2 ? good_i+1 : good_i <= 4 ? good_i-1 : 3;

-  // now good_row0 and good_row1 are correctly set
-  good:
+    // Cofactors calculation. Because in the process of cofactor computation some pairs in three-
+    // element products are repeated, it is not reasonable to load these pairs anew every time. The
+    // values in the registers with these pairs are formed using shuffle instruction. Cofactors are
+    // calculated row by row (4 elements are placed in 1 SP FP SIMD floating point register).

-  // do row permutations to move this 2x2 block to the top
-  matrix.row(0).swap(matrix.row(good_row0));
-  matrix.row(1).swap(matrix.row(good_row1));
-  // now applying our helper function is numerically stable
-  ei_compute_inverse_in_size4_case_helper(matrix, result);
-  // Since we did row permutations on the original matrix, we need to do column permutations
-  // in the reverse order on the inverse
-  result->col(1).swap(result->col(good_row1));
-  result->col(0).swap(result->col(good_row0));
-}
+    tmp1   = _mm_mul_ps(row2, row3);
+    tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
+    minor0  = _mm_mul_ps(row1, tmp1);
+    minor1  = _mm_mul_ps(row0, tmp1);
+    tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
+    minor0  = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0);
+    minor1  = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1);
+    minor1  = _mm_shuffle_ps(minor1, minor1, 0x4E);
+    //    -----------------------------------------------
+    tmp1   = _mm_mul_ps(row1, row2);
+    tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
+    minor0  = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0);
+    minor3  = _mm_mul_ps(row0, tmp1);
+    tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
+    minor0  = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1));
+    minor3  = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor3);
+    minor3  = _mm_shuffle_ps(minor3, minor3, 0x4E);
+    //    -----------------------------------------------
+    tmp1   = _mm_mul_ps(_mm_shuffle_ps(row1, row1, 0x4E), row3);
+    tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
+    row2   = _mm_shuffle_ps(row2, row2, 0x4E);
+    minor0  = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0);
+    minor2  = _mm_mul_ps(row0, tmp1);
+    tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
+    minor0  = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1));
+    minor2  = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2);
+    minor2  = _mm_shuffle_ps(minor2, minor2, 0x4E);
+    //    -----------------------------------------------
+    tmp1   = _mm_mul_ps(row0, row1);
+    tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
+    minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2);
+    minor3 = _mm_sub_ps(_mm_mul_ps(row2, tmp1), minor3);
+    tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
+    minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2);
+    minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row2, tmp1));
+    //           -----------------------------------------------
+    tmp1   = _mm_mul_ps(row0, row3);
+    tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
+    minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1));
+    minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2);
+    tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
+    minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1);
+    minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1));
+    //           -----------------------------------------------
+    tmp1   = _mm_mul_ps(row0, row2);
+    tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
+    minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1);
+    minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row1, tmp1));
+    tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
+    minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1));
+    minor3 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor3);
+
+    // Evaluation of determinant and its reciprocal value. In the original Intel document,
+    // 1/det was evaluated using a fast rcpps command with subsequent approximation using
+    // the Newton-Raphson algorithm. Here, we go for a IEEE-compliant division instead,
+    // so as to not compromise precision at all.
+    det    = _mm_mul_ps(row0, minor0);
+    det    = _mm_add_ps(_mm_shuffle_ps(det, det, 0x4E), det);
+    det    = _mm_add_ss(_mm_shuffle_ps(det, det, 0xB1), det);
+//     tmp1= _mm_rcp_ss(det);
+//     det= _mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1)));
+    det    = _mm_div_ss(_mm_set_ss(1.0f), det); // <--- yay, one original line not copied from Intel
+    det    = _mm_shuffle_ps(det, det, 0x00);
+    // warning, Intel's variable naming is very confusing: now 'det' is 1/det !
+
+    // Multiplication of cofactors by 1/det. Storing the inverse matrix to the address in pointer src.
+    minor0 = _mm_mul_ps(det, minor0);
+    float *dst = result.data();
+    _mm_storel_pi((__m64*)(dst), minor0);
+    _mm_storeh_pi((__m64*)(dst+2), minor0);
+    minor1 = _mm_mul_ps(det, minor1);
+    _mm_storel_pi((__m64*)(dst+4), minor1);
+    _mm_storeh_pi((__m64*)(dst+6), minor1);
+    minor2 = _mm_mul_ps(det, minor2);
+    _mm_storel_pi((__m64*)(dst+ 8), minor2);
+    _mm_storeh_pi((__m64*)(dst+10), minor2);
+    minor3 = _mm_mul_ps(det, minor3);
+    _mm_storel_pi((__m64*)(dst+12), minor3);
+    _mm_storeh_pi((__m64*)(dst+14), minor3);
+  }
+};
+#endif

 /***********************************************
 *** Part 2 : selector and MatrixBase methods ***
 ***********************************************/

-template<typename MatrixType, int Size = MatrixType::RowsAtCompileTime>
+template<typename Derived, typename OtherDerived, int Size = Derived::RowsAtCompileTime>
 struct ei_compute_inverse
 {
-  static inline void run(const MatrixType& matrix, MatrixType* result)
+  static inline void run(const Derived& matrix, OtherDerived* result)
  {
-    LU<MatrixType> lu(matrix);
+    LU<Derived> lu(matrix);
    lu.computeInverse(result);
  }
 };

-template<typename MatrixType>
-struct ei_compute_inverse<MatrixType, 1>
+template<typename Derived, typename OtherDerived>
+struct ei_compute_inverse<Derived, OtherDerived, 1>
 {
-  static inline void run(const MatrixType& matrix, MatrixType* result)
+  static inline void run(const Derived& matrix, OtherDerived* result)
  {
-    typedef typename MatrixType::Scalar Scalar;
+    typedef typename Derived::Scalar Scalar;
    result->coeffRef(0,0) = Scalar(1) / matrix.coeff(0,0);
  }
 };

-template<typename MatrixType>
-struct ei_compute_inverse<MatrixType, 2>
+template<typename Derived, typename OtherDerived>
+struct ei_compute_inverse<Derived, OtherDerived, 2>
 {
-  static inline void run(const MatrixType& matrix, MatrixType* result)
+  static inline void run(const Derived& matrix, OtherDerived* result)
  {
    ei_compute_inverse_in_size2_case(matrix, result);
  }
 };

-template<typename MatrixType>
-struct ei_compute_inverse<MatrixType, 3>
+template<typename Derived, typename OtherDerived>
+struct ei_compute_inverse<Derived, OtherDerived, 3>
 {
-  static inline void run(const MatrixType& matrix, MatrixType* result)
+  static inline void run(const Derived& matrix, OtherDerived* result)
  {
    ei_compute_inverse_in_size3_case(matrix, result);
  }
 };

-template<typename MatrixType>
-struct ei_compute_inverse<MatrixType, 4>
+template<typename Derived, typename OtherDerived>
+struct ei_compute_inverse<Derived, OtherDerived, 4>
 {
-  static inline void run(const MatrixType& matrix, MatrixType* result)
+  static inline void run(const Derived& matrix, OtherDerived* result)
  {
-    ei_compute_inverse_in_size4_case(matrix, result);
+    ei_compute_inverse_in_size4_case<Derived, OtherDerived>::run(matrix, *result);
  }
 };

@@ -234,11 +290,12 @@ struct ei_compute_inverse<MatrixType, 4>
  * \sa inverse()
  */
 template<typename Derived>
-inline void MatrixBase<Derived>::computeInverse(PlainMatrixType *result) const
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::computeInverse(MatrixBase<OtherDerived> *result) const
 {
  ei_assert(rows() == cols());
  EIGEN_STATIC_ASSERT(NumTraits<Scalar>::HasFloatingPoint,NUMERIC_TYPE_MUST_BE_FLOATING_POINT)
-  ei_compute_inverse<PlainMatrixType>::run(eval(), result);
+  ei_compute_inverse<PlainMatrixType, OtherDerived>::run(eval(), static_cast<OtherDerived*>(result));
 }

 /** \lu_module
--- a/Eigen/src/LU/LU.h
+++ b/Eigen/src/LU/LU.h
@@ -68,7 +68,7 @@ template<typename MatrixType> class LU
    typedef Matrix<Scalar, 1, MatrixType::ColsAtCompileTime> RowVectorType;
    typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> ColVectorType;

-    enum { MaxSmallDimAtCompileTime = EIGEN_ENUM_MIN(
+    enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN(
             MatrixType::MaxColsAtCompileTime,
             MatrixType::MaxRowsAtCompileTime)
    };
@@ -297,7 +297,8 @@ template<typename MatrixType> class LU
      *
      * \sa MatrixBase::computeInverse(), inverse()
      */
-    inline void computeInverse(MatrixType *result) const
+    template<typename ResultType>
+    inline void computeInverse(ResultType *result) const
    {
      solve(MatrixType::Identity(m_lu.rows(), m_lu.cols()), result);
    }
@@ -508,7 +509,7 @@ bool LU<MatrixType>::solve(
  if(!isSurjective())
  {
    // is c is in the image of U ?
-    RealScalar biggest_in_c = m_rank>0 ? c.corner(TopLeft, m_rank, c.cols()).cwise().abs().maxCoeff() : 0;
+    RealScalar biggest_in_c = m_rank>0 ? c.corner(TopLeft, m_rank, c.cols()).cwise().abs().maxCoeff() : RealScalar(0);
    for(int col = 0; col < c.cols(); ++col)
      for(int row = m_rank; row < c.rows(); ++row)
        if(!ei_isMuchSmallerThan(c.coeff(row,col), biggest_in_c, m_precision))
--- a/Eigen/src/QR/Tridiagonalization.h
+++ b/Eigen/src/QR/Tridiagonalization.h
@@ -293,7 +293,7 @@ void Tridiagonalization<MatrixType>::_compute(MatrixType& matA, CoeffVectorType&
      {
        int starti = i+1;
        int alignedEnd = starti;
-        if (PacketSize>1)
+        if (PacketSize>1 && (int(MatrixType::Flags)&RowMajor) == 0)
        {
          int alignedStart = (starti) + ei_alignmentOffset(&matA.coeffRef(starti,j1), n-starti);
          alignedEnd = alignedStart + ((n-alignedStart)/PacketSize)*PacketSize;
--- a/Eigen/src/SVD/SVD.h
+++ b/Eigen/src/SVD/SVD.h
@@ -49,7 +49,7 @@ template<typename MatrixType> class SVD
    enum {
      PacketSize = ei_packet_traits<Scalar>::size,
      AlignmentMask = int(PacketSize)-1,
-      MinSize = EIGEN_ENUM_MIN(MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime)
+      MinSize = EIGEN_SIZE_MIN(MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime)
    };

    typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> ColVector;
--- a/Eigen/src/Sparse/SparseProduct.h
+++ b/Eigen/src/Sparse/SparseProduct.h
@@ -97,7 +97,7 @@ struct ei_traits<SparseProduct<LhsNested, RhsNested, ProductMode> >

    RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
    ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
-    InnerSize = EIGEN_ENUM_MIN(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
+    InnerSize = EIGEN_SIZE_MIN(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),

    MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
--- a/Eigen/src/Sparse/TriangularSolver.h
+++ b/Eigen/src/Sparse/TriangularSolver.h
@@ -43,8 +43,11 @@ struct ei_solve_triangular_selector<Lhs,Rhs,LowerTriangular,RowMajor|IsSparse>
        {
          lastVal = it.value();
          lastIndex = it.index();
+          if(lastIndex == i)
+            break;
          tmp -= lastVal * other.coeff(lastIndex,col);
        }
+
        if (Lhs::Flags & UnitDiagBit)
          other.coeffRef(i,col) = tmp;
        else
--- a/bench/BenchTimer.h
+++ b/bench/BenchTimer.h
@@ -1,8 +1,8 @@
 // This file is part of Eigen, a lightweight C++ template library
-// for linear algebra. Eigen itself is part of the KDE project.
+// for linear algebra.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -26,8 +26,15 @@
 #ifndef EIGEN_BENCH_TIMER_H
 #define EIGEN_BENCH_TIMER_H

-#include <sys/time.h>
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#else
+#include <time.h>
 #include <unistd.h>
+#endif
+
 #include <cstdlib>
 #include <numeric>

@@ -35,12 +42,25 @@ namespace Eigen
 {

 /** Elapsed time timer keeping the best try.
+  *
+  * On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID.
+  * On Windows we use QueryPerformanceCounter
+  *
+  * Important: on linux, you must link with -lrt
  */
 class BenchTimer
 {
 public:

-  BenchTimer() { reset(); }
+  BenchTimer() 
+  { 
+#if defined(_WIN32) || defined(__CYGWIN__)
+    LARGE_INTEGER freq;
+    QueryPerformanceFrequency(&freq);
+    m_frequency = (double)freq.QuadPart;
+#endif
+    reset(); 
+  }

  ~BenchTimer() {}

@@ -51,23 +71,34 @@ public:
    m_best = std::min(m_best, getTime() - m_start);
  }

-  /** Return the best elapsed time.
+  /** Return the best elapsed time in seconds.
    */
  inline double value(void)
  {
-      return m_best;
+    return m_best;
  }

+#if defined(_WIN32) || defined(__CYGWIN__)
+  inline double getTime(void)
+#else
  static inline double getTime(void)
+#endif
  {
-      struct timeval tv;
-      struct timezone tz;
-      gettimeofday(&tv, &tz);
-      return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
+#ifdef WIN32
+    LARGE_INTEGER query_ticks;
+    QueryPerformanceCounter(&query_ticks);
+    return query_ticks.QuadPart/m_frequency;
+#else
+    timespec ts;
+    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
+    return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
+#endif
  }

 protected:
-
+#if defined(_WIN32) || defined(__CYGWIN__)
+  double m_frequency;
+#endif
  double m_best, m_start;

 };
--- a/doc/CustomizingEigen.dox
+++ b/doc/CustomizingEigen.dox
@@ -57,10 +57,10 @@ void makeFloor(const MatrixBase<OtherDerived>& other) { derived() = derived().cw
 template<typename OtherDerived>
 void makeCeil(const MatrixBase<OtherDerived>& other) { derived() = derived().cwise().max(other.derived()); }

-const typename Cwise<Derived>::ScalarAddReturnType
-operator+(const Scalar& scalar) const { return cwise() + scalar }
+const const CwiseUnaryOp<ei_scalar_add_op<Scalar>, Derived>
+operator+(const Scalar& scalar) const { return cwise() + scalar; }

-friend const typename Cwise<Derived>::ScalarAddReturnType
+friend const CwiseUnaryOp<ei_scalar_add_op<Scalar>, Derived>
 operator+(const Scalar& scalar, const MatrixBase<Derived>& mat) { return mat + scalar; }
 \endcode

--- a/doc/StlContainers.dox
+++ b/doc/StlContainers.dox
@@ -11,7 +11,7 @@ namespace Eigen {

 \section summary Executive summary

-Using STL containers on \ref FixedSizeVectorizable "fixed-size vectorizable Eigen types" requires taking the following two steps:
+Using STL containers on \ref FixedSizeVectorizable "fixed-size vectorizable Eigen types", or classes having members of such types, requires taking the following two steps:

 \li A 16-byte-aligned allocator must be used. Eigen does provide one ready for use: aligned_allocator.
 \li If you want to use the std::vector container, you need to \#include <Eigen/StdVector>.
--- a/doc/UnalignedArrayAssert.dox
+++ b/doc/UnalignedArrayAssert.dox
@@ -55,16 +55,17 @@ Note that here, Eigen::Vector2d is only used as an example, more generally the i

 \section c2 Cause 2: STL Containers

-If you use STL Containers such as std::vector, std::map, ..., with Eigen objects, like this,
+If you use STL Containers such as std::vector, std::map, ..., with Eigen objects, or with classes containing Eigen objects, like this,

 \code
 std::vector<Eigen::Matrix2f> my_vector;
-std::map<int, Eigen::Matrix2f> my_map;
+struct my_class { ... Eigen::Matrix2f m; ... };
+std::map<int, my_class> my_map;
 \endcode

 then you need to read this separate page: \ref StlContainers "Using STL Containers with Eigen".

-Note that here, Eigen::Matrix2f is only used as an example, more generally the issue arises for all \ref FixedSizeVectorizable "fixed-size vectorizable Eigen types".
+Note that here, Eigen::Matrix2f is only used as an example, more generally the issue arises for all \ref FixedSizeVectorizable "fixed-size vectorizable Eigen types" and \ref StructHavingEigenMembers "structures having such Eigen objects as member".

 \section c3 Cause 3: Passing Eigen objects by value

--- a/scripts/eigen_gen_docs
+++ b/scripts/eigen_gen_docs
@@ -1,8 +1,5 @@
 #!/bin/sh

-# TODO : actually exit on exit, currently it only exit from the ()
-# TODO : display error msg on stderr instead of stdout
-
 # configuration
 # You should call this script with USER set as you want, else some default
 # will be used
@@ -11,16 +8,12 @@ USER=${USER:-'orzel'}
 # step 1 : build
 # todo if 'build is not there, create one:
 #mkdir build
-(cd build && cmake .. && make -j3 doc) || (echo "make failed"; exit 1)
+(cd build && cmake .. && make -j3 doc) || { echo "make failed"; exit 1; }
 #todo: n+1 where n = number of cpus

 #step 2 : upload
-BRANCH=`hg branch`
-if [ $BRANCH == "default" ]
-then
-    BRANCH='devel'
-fi
 # (the '/' at the end of path are very important, see rsync documentation)
-rsync -az build/doc/html/ $USER@ssh.tuxfamily.org:eigen/eigen.tuxfamily.org-web/htdocs/dox-$BRANCH/ ||  (echo "upload failed"; exit 1)
+rsync -az build/doc/html/ $USER@ssh.tuxfamily.org:eigen/eigen.tuxfamily.org-web/htdocs/dox-2.0/ || { echo "upload failed"; exit 1; }

+echo "Uploaded successfully"

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,3 +1,7 @@
+option(EIGEN_DEFAULT_TO_ROW_MAJOR "Use row-major as default matrix storage order" OFF)
+if(EIGEN_DEFAULT_TO_ROW_MAJOR)
+  add_definitions("-DEIGEN_DEFAULT_TO_ROW_MAJOR")
+endif()

 find_package(GSL)
 if(GSL_FOUND AND GSL_VERSION_MINOR LESS 9)
@@ -93,12 +97,20 @@ else(CMAKE_COMPILER_IS_GNUCXX)
 endif(CMAKE_COMPILER_IS_GNUCXX)

 option(EIGEN_NO_ASSERTION_CHECKING "Disable checking of assertions" OFF)
+if(EIGEN_NO_ASSERTION_CHECKING)
+  add_definitions("-DEIGEN_NO_ASSERTION_CHECKING=1")
+endif()
+

 # similar to set_target_properties but append the property instead of overwriting it
 macro(ei_add_target_property target prop value)

  get_target_property(previous ${target} ${prop})
-  set_target_properties(${target} PROPERTIES ${prop} "${previous} ${value}")
+  if(previous MATCHES "NOTFOUND")
+    set_target_properties(${target} PROPERTIES ${prop} "${value}")
+  else()
+    set_target_properties(${target} PROPERTIES ${prop} "${previous} ${value}")
+  endif()

 endmacro(ei_add_target_property)

@@ -134,13 +146,9 @@ macro(ei_add_test testname)

    option(EIGEN_DEBUG_ASSERTS "Enable debuging of assertions" OFF)
    if(EIGEN_DEBUG_ASSERTS)
-      set_target_properties(${targetname} PROPERTIES COMPILE_DEFINITIONS "-DEIGEN_DEBUG_ASSERTS=1")
+      set_target_properties(${targetname} PROPERTIES COMPILE_DEFINITIONS "EIGEN_DEBUG_ASSERTS=1")
    endif(EIGEN_DEBUG_ASSERTS)

-  else(NOT EIGEN_NO_ASSERTION_CHECKING)
-
-    set_target_properties(${targetname} PROPERTIES COMPILE_DEFINITIONS "-DEIGEN_NO_ASSERTION_CHECKING=1")
-
  endif(NOT EIGEN_NO_ASSERTION_CHECKING)

  if(${ARGC} GREATER 1)
@@ -180,6 +188,7 @@ ei_add_test(meta)
 ei_add_test(sizeof)
 ei_add_test(dynalloc)
 ei_add_test(nomalloc)
+ei_add_test(first_aligned)
 ei_add_test(mixingtypes)
 ei_add_test(packetmath)
 ei_add_test(unalignedassert)
@@ -200,7 +209,7 @@ ei_add_test(array)
 ei_add_test(triangular)
 ei_add_test(cholesky " " "${GSL_LIBRARIES}")
 ei_add_test(lu ${EI_OFLAG})
-ei_add_test(determinant)
+ei_add_test(determinant ${EI_OFLAG})
 ei_add_test(inverse)
 ei_add_test(qr)
 ei_add_test(eigensolver " " "${GSL_LIBRARIES}")
@@ -215,10 +224,12 @@ ei_add_test(newstdvector)
 if(QT4_FOUND)
  ei_add_test(qtvector " " "${QT_QTCORE_LIBRARY}")
 endif(QT4_FOUND)
-ei_add_test(sparse_vector)
-ei_add_test(sparse_basic)
-ei_add_test(sparse_solvers " " "${SPARSE_LIBS}")
-ei_add_test(sparse_product)
+if(NOT EIGEN_DEFAULT_TO_ROW_MAJOR)
+  ei_add_test(sparse_vector)
+  ei_add_test(sparse_basic)
+  ei_add_test(sparse_solvers " " "${SPARSE_LIBS}")
+  ei_add_test(sparse_product)
+endif()
 ei_add_test(swap)
 ei_add_test(visitor)

@@ -267,6 +278,12 @@ else(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
  message("Explicit vec:      AUTO")
 endif(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)

+if(EIGEN_DEFAULT_TO_ROW_MAJOR)
+  message("Default order:     Row-major")
+else()
+  message("Default order:     Column-major")
+endif()
+
  message("CXX:               ${CMAKE_CXX_COMPILER}")
 if(CMAKE_COMPILER_IS_GNUCXX)
  execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version COMMAND head -n 1 OUTPUT_VARIABLE EIGEN_CXX_VERSION_STRING OUTPUT_STRIP_TRAILING_WHITESPACE)
--- a/test/first_aligned.cpp
+++ b/test/first_aligned.cpp
@@ -0,0 +1,64 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include "main.h"
+
+template<typename Scalar>
+void test_first_aligned_helper(Scalar *array, int size)
+{
+  const int packet_size = sizeof(Scalar) * ei_packet_traits<Scalar>::size;
+  VERIFY(((size_t(array) + sizeof(Scalar) * ei_alignmentOffset(array, size)) % packet_size) == 0);
+}
+
+template<typename Scalar>
+void test_none_aligned_helper(Scalar *array, int size)
+{
+  VERIFY(ei_packet_traits<Scalar>::size == 1 || ei_alignmentOffset(array, size) == size);
+}
+
+struct some_non_vectorizable_type { float x; };
+
+void test_first_aligned()
+{
+  EIGEN_ALIGN_128 float array_float[100];
+  test_first_aligned_helper(array_float, 50);
+  test_first_aligned_helper(array_float+1, 50);
+  test_first_aligned_helper(array_float+2, 50);
+  test_first_aligned_helper(array_float+3, 50);
+  test_first_aligned_helper(array_float+4, 50);
+  test_first_aligned_helper(array_float+5, 50);
+  
+  EIGEN_ALIGN_128 double array_double[100];
+  test_first_aligned_helper(array_double, 50);
+  test_first_aligned_helper(array_double+1, 50);
+  test_first_aligned_helper(array_double+2, 50);
+  
+  double *array_double_plus_4_bytes = (double*)(size_t(array_double)+4);
+  test_none_aligned_helper(array_double_plus_4_bytes, 50);
+  test_none_aligned_helper(array_double_plus_4_bytes+1, 50);
+  
+  some_non_vectorizable_type array_nonvec[100];
+  test_first_aligned_helper(array_nonvec, 100);
+  test_none_aligned_helper(array_nonvec, 100);
+}
--- a/test/inverse.cpp
+++ b/test/inverse.cpp
@@ -43,11 +43,9 @@ template<typename MatrixType> void inverse(const MatrixType& m)
             mzero = MatrixType::Zero(rows, cols),
             identity = MatrixType::Identity(rows, rows);

-  if (ei_is_same_type<RealScalar,float>::ret)
+  while(ei_abs(m1.determinant()) < RealScalar(0.1) && rows <= 8)
  {
-    // let's build a more stable to inverse matrix
-    MatrixType a = MatrixType::Random(rows,cols);
-    m1 += m1 * m1.adjoint() + a * a.adjoint();
+    m1 = MatrixType::Random(rows, cols);
  }

  m2 = m1.inverse();
--- a/test/map.cpp
+++ b/test/map.cpp
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -24,7 +24,7 @@

 #include "main.h"

-template<typename VectorType> void map_class(const VectorType& m)
+template<typename VectorType> void map_class_vector(const VectorType& m)
 {
  typedef typename VectorType::Scalar Scalar;

@@ -50,6 +50,34 @@ template<typename VectorType> void map_class(const VectorType& m)
  delete[] array3;
 }

+template<typename MatrixType> void map_class_matrix(const MatrixType& m)
+{
+  typedef typename MatrixType::Scalar Scalar;
+
+  int rows = m.rows(), cols = m.cols(), size = rows*cols;
+
+  // test Map.h
+  Scalar* array1 = ei_aligned_new<Scalar>(size);
+  for(int i = 0; i < size; i++) array1[i] = Scalar(1);
+  Scalar* array2 = ei_aligned_new<Scalar>(size);
+  for(int i = 0; i < size; i++) array2[i] = Scalar(1);
+  Scalar* array3 = new Scalar[size+1];
+  for(int i = 0; i < size+1; i++) array3[i] = Scalar(1);
+  Scalar* array3unaligned = size_t(array3)%16 == 0 ? array3+1 : array3;
+  Map<MatrixType, Aligned>(array1, rows, cols) = MatrixType::Ones(rows,cols);
+  Map<MatrixType>(array2, rows, cols) = Map<MatrixType>(array1, rows, cols);
+  Map<MatrixType>(array3unaligned, rows, cols) = Map<MatrixType>(array1, rows, cols);
+  MatrixType ma1 = Map<MatrixType>(array1, rows, cols);
+  MatrixType ma2 = Map<MatrixType, Aligned>(array2, rows, cols);
+  VERIFY_IS_APPROX(ma1, ma2);
+  MatrixType ma3 = Map<MatrixType>(array3unaligned, rows, cols);
+  VERIFY_IS_APPROX(ma1, ma3);
+  
+  ei_aligned_delete(array1, size);
+  ei_aligned_delete(array2, size);
+  delete[] array3;
+}
+
 template<typename VectorType> void map_static_methods(const VectorType& m)
 {
  typedef typename VectorType::Scalar Scalar;
@@ -80,11 +108,17 @@ template<typename VectorType> void map_static_methods(const VectorType& m)
 void test_map()
 {
  for(int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST( map_class(Matrix<float, 1, 1>()) );
-    CALL_SUBTEST( map_class(Vector4d()) );
-    CALL_SUBTEST( map_class(RowVector4f()) );
-    CALL_SUBTEST( map_class(VectorXcf(8)) );
-    CALL_SUBTEST( map_class(VectorXi(12)) );
+    CALL_SUBTEST( map_class_vector(Matrix<float, 1, 1>()) );
+    CALL_SUBTEST( map_class_vector(Vector4d()) );
+    CALL_SUBTEST( map_class_vector(RowVector4f()) );
+    CALL_SUBTEST( map_class_vector(VectorXcf(8)) );
+    CALL_SUBTEST( map_class_vector(VectorXi(12)) );
+
+    CALL_SUBTEST( map_class_matrix(Matrix<float, 1, 1>()) );
+    CALL_SUBTEST( map_class_matrix(Matrix4d()) );
+    CALL_SUBTEST( map_class_matrix(Matrix<float,3,5>()) );
+    CALL_SUBTEST( map_class_matrix(MatrixXcf(ei_random<int>(1,10),ei_random<int>(1,10))) );
+    CALL_SUBTEST( map_class_matrix(MatrixXi(ei_random<int>(1,10),ei_random<int>(1,10))) );

    CALL_SUBTEST( map_static_methods(Matrix<double, 1, 1>()) );
    CALL_SUBTEST( map_static_methods(Vector3f()) );
--- a/test/prec_inverse_4x4.cpp
+++ b/test/prec_inverse_4x4.cpp
@@ -45,7 +45,6 @@ template<typename MatrixType> void inverse_permutation_4x4()
 {
  typedef typename MatrixType::Scalar Scalar;
  typedef typename MatrixType::RealScalar RealScalar;
-  double error_max = 0.;
  Vector4i indices(0,1,2,3);
  for(int i = 0; i < 24; ++i)
  {
@@ -56,12 +55,9 @@ template<typename MatrixType> void inverse_permutation_4x4()
    m(indices(3),3) = 1;
    MatrixType inv = m.inverse();
    double error = double( (m*inv-MatrixType::Identity()).norm() / epsilon<Scalar>() );
-    error_max = std::max(error_max, error);
+    VERIFY(error == 0.0);
    std::next_permutation(indices.data(),indices.data()+4);
  }
-  std::cerr << "inverse_permutation_4x4, Scalar = " << type_name<Scalar>() << std::endl;
-  EIGEN_DEBUG_VAR(error_max);
-  VERIFY(error_max < 1. );
 }

 template<typename MatrixType> void inverse_general_4x4(int repeat)
@@ -86,8 +82,8 @@ template<typename MatrixType> void inverse_general_4x4(int repeat)
  double error_avg = error_sum / repeat;
  EIGEN_DEBUG_VAR(error_avg);
  EIGEN_DEBUG_VAR(error_max);
-  VERIFY(error_avg < (NumTraits<Scalar>::IsComplex ? 8.4 : 1.4) );
-  VERIFY(error_max < (NumTraits<Scalar>::IsComplex ? 160.0 : 75.) );
+  VERIFY(error_avg < (NumTraits<Scalar>::IsComplex ? 8.0 : 1.0));
+  VERIFY(error_max < (NumTraits<Scalar>::IsComplex ? 64.0 : 20.0));
 }

 void test_prec_inverse_4x4()
--- a/test/product.h
+++ b/test/product.h
@@ -46,7 +46,7 @@ template<typename MatrixType> void product(const MatrixType& m)
  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> RowSquareMatrixType;
  typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime> ColSquareMatrixType;
  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime,
-                         MatrixType::Flags&RowMajorBit> OtherMajorMatrixType;
+                         MatrixType::Options^RowMajor> OtherMajorMatrixType;

  int rows = m.rows();
  int cols = m.cols();
@@ -77,6 +77,7 @@ template<typename MatrixType> void product(const MatrixType& m)

  // begin testing Product.h: only associativity for now
  // (we use Transpose.h but this doesn't count as a test for it)
+
  VERIFY_IS_APPROX((m1*m1.transpose())*m2,  m1*(m1.transpose()*m2));
  m3 = m1;
  m3 *= m1.transpose() * m2;
@@ -137,6 +138,7 @@ template<typename MatrixType> void product(const MatrixType& m)
  res2 = square2;
  res2 += (m1.transpose() * m2).lazy();
  VERIFY_IS_APPROX(res2, square2 + m1.transpose() * m2);
+
  if (NumTraits<Scalar>::HasFloatingPoint && std::min(rows,cols)>1)
  {
    VERIFY(areNotApprox(res2,square2 + m2.transpose() * m1));
--- a/test/sparse_solvers.cpp
+++ b/test/sparse_solvers.cpp
@@ -68,12 +68,20 @@ template<typename Scalar> void sparse_solvers(int rows, int cols)
    VERIFY_IS_APPROX(refMat2.template marked<LowerTriangular>().solveTriangular(vec2),
                     m2.template marked<LowerTriangular>().solveTriangular(vec3));

+    // lower - transpose
+    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular, &zeroCoords, &nonzeroCoords);
+    VERIFY_IS_APPROX(refMat2.template marked<LowerTriangular>().transpose().solveTriangular(vec2),
+                     m2.template marked<LowerTriangular>().transpose().solveTriangular(vec3));
+
    // upper
    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular, &zeroCoords, &nonzeroCoords);
    VERIFY_IS_APPROX(refMat2.template marked<UpperTriangular>().solveTriangular(vec2),
                     m2.template marked<UpperTriangular>().solveTriangular(vec3));

-    // TODO test row major
+    // upper - transpose
+    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular, &zeroCoords, &nonzeroCoords);
+    VERIFY_IS_APPROX(refMat2.template marked<UpperTriangular>().transpose().solveTriangular(vec2),
+                     m2.template marked<UpperTriangular>().transpose().solveTriangular(vec3));
  }

  // test LLT
--- a/test/vectorization_logic.cpp
+++ b/test/vectorization_logic.cpp
@@ -44,12 +44,21 @@ void test_vectorization_logic()

 #ifdef EIGEN_VECTORIZE

+#ifdef  EIGEN_DEFAULT_TO_ROW_MAJOR
+  VERIFY(test_assign(Vector4f(),Vector4f(),
+    LinearVectorization,CompleteUnrolling));
+  VERIFY(test_assign(Vector4f(),Vector4f()+Vector4f(),
+    LinearVectorization,CompleteUnrolling));
+  VERIFY(test_assign(Vector4f(),Vector4f().cwise() * Vector4f(),
+    LinearVectorization,CompleteUnrolling));
+#else
  VERIFY(test_assign(Vector4f(),Vector4f(),
    InnerVectorization,CompleteUnrolling));
  VERIFY(test_assign(Vector4f(),Vector4f()+Vector4f(),
    InnerVectorization,CompleteUnrolling));
  VERIFY(test_assign(Vector4f(),Vector4f().cwise() * Vector4f(),
    InnerVectorization,CompleteUnrolling));
+#endif

  VERIFY(test_assign(Matrix4f(),Matrix4f(),
    InnerVectorization,CompleteUnrolling));
@@ -92,8 +101,10 @@ void test_vectorization_logic()
  VERIFY(test_sum(Matrix<float,16,16>().block<4,4>(1,2),
    NoVectorization,CompleteUnrolling));

+#ifndef EIGEN_DEFAULT_TO_ROW_MAJOR
  VERIFY(test_sum(Matrix<float,16,16>().block<8,1>(1,2),
    LinearVectorization,CompleteUnrolling));
+#endif

  VERIFY(test_sum(Matrix<double,7,3>(),
    NoVectorization,CompleteUnrolling));
Author	SHA1	Message	Date
Benoit Jacob	ed6eb5a625	bump	2010-02-11 21:39:41 -05:00
Benoit Jacob	9488a12125	work around brain dead ICC	2010-02-11 19:32:56 -05:00
Piotr Trojanek	7b44957c4b	std:: namespace fixup for more restricive compilers such as QNX's QCC	2010-02-10 22:27:35 +01:00
Hauke Heibel	743ad75595	BenchTimer backport (clock_gettime & QueryPerformanceCounter).	2010-02-03 21:55:01 +01:00
Benoit Jacob	a9eabed421	Patch by 'Wolf' from the issue tracker: Fix bug #90, missing type cast in LU, allow to use LU with MPFR.	2010-02-02 07:06:15 -05:00
Benoit Jacob	cd34a1d351	backport bug fix by Jitse.	2010-01-28 14:00:09 -05:00
Benoit Jacob	3e963ee69d	EIGEN_ENUM_MIN ---> EIGEN_SIZE_MIN	2010-01-26 20:37:57 -05:00
Benoit Jacob	6cc9dc17f2	In LU / Inverse, decouple the output type from the input type. This has long been done in the default branch	2010-01-26 18:45:23 -05:00
Gael Guennebaud	7852a48a2f	fix matrix product with EIGEN_DEFAULT_TO_ROW_MAJOR	2010-01-25 21:56:01 +01:00
Benoit Jacob	d209120180	* Introduce EIGEN_DEFAULT_TO_ROW_MAJOR tests option ---> Now only product_large fails with EIGEN_DEFAULT_TO_ROW_MAJOR. * Fix EIGEN_NO_ASSERTION_CHECKING tests option * Fix a crash in Tridiagonalization on row-major matrices + SSE * Fix inverse test (numeric stability noise) * Extend map test (see previous fixes in MapBase) * Fix vectorization_logic test for row-major * Disable sparse tests with EIGEN_DEFAULT_TO_ROW_MAJOR	2010-01-25 14:00:02 -05:00
Thomas Capricelli	55c0707b1d	fix the script again (definitely?) + cleaning	2010-01-22 19:28:33 +01:00
Benoit Jacob	72044ca925	fix a super nasty bug: on row-major expressions that are NOT vectors but that do have LinearAccess, the MapBase::coeff(int) and MapBase::coeffRef(int) methods were broken.	2010-01-21 23:33:20 -05:00
Benoit Jacob	c2b8ca7493	if EIGEN_DONT_ALIGN then don't try to vectorize (was giving a #error later on).	2010-01-21 22:32:16 -05:00
Gael Guennebaud	018cb8975a	fix plugin doc	2010-01-17 19:55:08 +01:00
Benoit Jacob	3ab280ce4e	add missing semicolon in the example	2010-01-17 12:40:19 -05:00
Benoit Jacob	b40030753b	Added tag 2.0.11 for changeset `5f73a8df20`	2010-01-10 11:30:40 -05:00
Benoit Jacob	5f73a8df20	bump	2010-01-10 11:30:10 -05:00
Thomas Capricelli	8a6d5f10dc	backport from tip : actually stop on compile failure	2010-01-06 17:17:40 +01:00
Benoit Jacob	ba6ed5fa5f	Fix CoeffReadCost in Part: it must account for the cost of the conditional jump. This makes Part considered an "expensive" xpr that must be evaluated in operations such as Product. This fixes bug #80.	2010-01-02 13:04:04 -05:00
Benoit Jacob	e4c88c14ec	clarify docs as requested on the forum	2010-01-02 12:54:55 -05:00
Benoit Jacob	74207a31fa	backport the fix to bug #79 , and the unit test	2010-01-02 12:45:49 -05:00
Benoit Jacob	6fd9248c09	add Intel copyright info	2009-12-15 08:43:31 -05:00
Benoit Jacob	4262117f84	backport 4x4 inverse changes: - use cofactors - use Intel's SSE code in the float case	2009-12-15 08:16:48 -05:00
Gael Guennebaud	b581cb870c	fix #74 : sparse triangular solver for lower/row-major matrices	2009-12-14 10:20:35 +01:00
Gael Guennebaud	72fc81dd9d	backport quaternion slerp precision fix	2009-12-05 18:28:17 +01:00
Gael Guennebaud	f36650b00a	fix MSVC10 compilation issues	2009-12-02 19:34:37 +01:00
Benoit Jacob	8d31f58ea1	fix bug #70 Was trying to apply stupid invertibility check to top-left 2x2 corner.	2009-11-26 15:33:07 -05:00
Benoit Jacob	a161a70696	Added tag 2.0.10 for changeset `8f1ce52e76`	2009-11-25 08:54:17 -05:00