From 49034d15709bcad1f5fd3a36a8cd5aa362e7591e Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Wed, 27 Nov 2013 09:46:59 +0100
Subject: [PATCH 01/48] Fix bug #708: add placement new/delete for array

---
 Eigen/src/Core/util/Memory.h | 2 ++
 1 file changed, 2 insertions(+)
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index a292495e5..d177e8b5a 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -664,7 +664,9 @@ template<typename T> class aligned_stack_memory_handler
       /* memory allocated we can safely let the default implementation handle */ \
       /* this particular case. */ \
       static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
+      static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \
       void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \
+      void operator delete[](void * memory, void *ptr) throw() { return ::operator delete[](memory,ptr); } \
       /* nothrow-new (returns zero instead of std::bad_alloc) */ \
       EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
       void operator delete(void *ptr, const std::nothrow_t&) throw() { \

From d61345f36671d0cc04378bda0f60ca8378d20b4d Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Fri, 29 Nov 2013 19:42:11 +0100
Subject: [PATCH 02/48] Fix bug #609: Euler angles are in Range
 [0:pi]x[-pi:pi]x[-pi:pi]. Now the unit test verifies this (also that it is
 bijective in this range).

---
 Eigen/src/Geometry/EulerAngles.h |  2 +-
 test/geo_eulerangles.cpp         | 67 +++++++++++++++++++++-----------
 2 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/Eigen/src/Geometry/EulerAngles.h b/Eigen/src/Geometry/EulerAngles.h
index 97984d590..82802fb43 100644
--- a/Eigen/src/Geometry/EulerAngles.h
+++ b/Eigen/src/Geometry/EulerAngles.h
@@ -28,7 +28,7 @@ namespace Eigen {
   *      * AngleAxisf(ea[2], Vector3f::UnitZ()); \endcode
   * This corresponds to the right-multiply conventions (with right hand side frames).
   * 
-  * The returned angles are in the ranges [0:pi]x[0:pi]x[-pi:pi].
+  * The returned angles are in the ranges [0:pi]x[-pi:pi]x[-pi:pi].
   * 
   * \sa class AngleAxis
   */
diff --git a/test/geo_eulerangles.cpp b/test/geo_eulerangles.cpp
index 5445cd81a..b4830bd41 100644
--- a/test/geo_eulerangles.cpp
+++ b/test/geo_eulerangles.cpp
@@ -12,36 +12,48 @@
 #include <Eigen/LU>
 #include <Eigen/SVD>
 
-template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea)
+
+template<typename Scalar>
+void verify_euler(const Matrix<Scalar,3,1>& ea, int i, int j, int k)
 {
   typedef Matrix<Scalar,3,3> Matrix3;
   typedef Matrix<Scalar,3,1> Vector3;
   typedef AngleAxis<Scalar> AngleAxisx;
   using std::abs;
+  Matrix3 m(AngleAxisx(ea[0], Vector3::Unit(i)) * AngleAxisx(ea[1], Vector3::Unit(j)) * AngleAxisx(ea[2], Vector3::Unit(k)));
+  Vector3 eabis = m.eulerAngles(i, j, k);
+  Matrix3 mbis(AngleAxisx(eabis[0], Vector3::Unit(i)) * AngleAxisx(eabis[1], Vector3::Unit(j)) * AngleAxisx(eabis[2], Vector3::Unit(k))); 
+  VERIFY_IS_APPROX(m,  mbis); 
+  /* If I==K, and ea[1]==0, then there no unique solution. */ 
+  /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */ 
+  if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(M_PI/2),test_precision<Scalar>())) ) 
+    VERIFY((ea-eabis).norm() <= test_precision<Scalar>());
   
-  #define VERIFY_EULER(I,J,K, X,Y,Z) { \
-    Matrix3 m(AngleAxisx(ea[0], Vector3::Unit##X()) * AngleAxisx(ea[1], Vector3::Unit##Y()) * AngleAxisx(ea[2], Vector3::Unit##Z())); \
-    Vector3 eabis = m.eulerAngles(I,J,K); \
-    Matrix3 mbis(AngleAxisx(eabis[0], Vector3::Unit##X()) * AngleAxisx(eabis[1], Vector3::Unit##Y()) * AngleAxisx(eabis[2], Vector3::Unit##Z())); \
-    VERIFY_IS_APPROX(m,  mbis); \
-    /* If I==K, and ea[1]==0, then there no unique solution. */ \
-    /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */ \
-    if( (I!=K || ea[1]!=0) && (I==K || !internal::isApprox(abs(ea[1]),Scalar(M_PI/2),test_precision<Scalar>())) ) VERIFY((ea-eabis).norm() <= test_precision<Scalar>()); \
-  }
-  VERIFY_EULER(0,1,2, X,Y,Z);
-  VERIFY_EULER(0,1,0, X,Y,X);
-  VERIFY_EULER(0,2,1, X,Z,Y);
-  VERIFY_EULER(0,2,0, X,Z,X);
+  // approx_or_less_than does not work for 0
+  VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1)));
+  VERIFY_IS_APPROX_OR_LESS_THAN(eabis[0], Scalar(M_PI));
+  VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(M_PI), eabis[1]);
+  VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], Scalar(M_PI));
+  VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(M_PI), eabis[2]);
+  VERIFY_IS_APPROX_OR_LESS_THAN(eabis[2], Scalar(M_PI));
+}
 
-  VERIFY_EULER(1,2,0, Y,Z,X);
-  VERIFY_EULER(1,2,1, Y,Z,Y);
-  VERIFY_EULER(1,0,2, Y,X,Z);
-  VERIFY_EULER(1,0,1, Y,X,Y);
+template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea)
+{
+  verify_euler(ea, 0,1,2);
+  verify_euler(ea, 0,1,0);
+  verify_euler(ea, 0,2,1);
+  verify_euler(ea, 0,2,0);
 
-  VERIFY_EULER(2,0,1, Z,X,Y);
-  VERIFY_EULER(2,0,2, Z,X,Z);
-  VERIFY_EULER(2,1,0, Z,Y,X);
-  VERIFY_EULER(2,1,2, Z,Y,Z);
+  verify_euler(ea, 1,2,0);
+  verify_euler(ea, 1,2,1);
+  verify_euler(ea, 1,0,2);
+  verify_euler(ea, 1,0,1);
+
+  verify_euler(ea, 2,0,1);
+  verify_euler(ea, 2,0,2);
+  verify_euler(ea, 2,1,0);
+  verify_euler(ea, 2,1,2);
 }
 
 template<typename Scalar> void eulerangles()
@@ -63,7 +75,16 @@ template<typename Scalar> void eulerangles()
   ea = m.eulerAngles(0,1,0);
   check_all_var(ea);
   
-  ea = (Array3::Random() + Array3(1,1,0))*Scalar(M_PI)*Array3(0.5,0.5,1);
+  // Check with purely random Quaternion:
+  q1.coeffs() = Quaternionx::Coefficients::Random().normalized();
+  m = q1;
+  ea = m.eulerAngles(0,1,2);
+  check_all_var(ea);
+  ea = m.eulerAngles(0,1,0);
+  check_all_var(ea);
+  
+  // Check with random angles in range [0:pi]x[-pi:pi]x[-pi:pi].
+  ea = (Array3::Random() + Array3(1,0,0))*Scalar(M_PI)*Array3(0.5,1,1);
   check_all_var(ea);
   
   ea[2] = ea[0] = internal::random<Scalar>(0,Scalar(M_PI));

From 276801b25a22ec2fed2bb13d217471e6098aea05 Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Fri, 29 Nov 2013 19:54:01 +0100
Subject: [PATCH 03/48] Fixed and simplified Matlab code and added further
 block-related examples

---
 doc/AsciiQuickReference.txt | 40 ++++++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/doc/AsciiQuickReference.txt b/doc/AsciiQuickReference.txt
index fc613864f..d3bfd439c 100644
--- a/doc/AsciiQuickReference.txt
+++ b/doc/AsciiQuickReference.txt
@@ -16,8 +16,8 @@ double s;
 // Basic usage
 // Eigen          // Matlab           // comments
 x.size()          // length(x)        // vector size
-C.rows()          // size(C)(1)       // number of rows
-C.cols()          // size(C)(2)       // number of columns
+C.rows()          // size(C,1)        // number of rows
+C.cols()          // size(C,2)        // number of columns
 x(i)              // x(i+1)           // Matlab is 1-based
 C(i,j)            // C(i+1,j+1)       //
 
@@ -51,20 +51,34 @@ v.setLinSpace(size,low,high)        // v = linspace(low,high,size)'
 // Eigen                           // Matlab
 x.head(n)                          // x(1:n)
 x.head<n>()                        // x(1:n)
-x.tail(n)                          // N = rows(x); x(N - n: N)
-x.tail<n>()                        // N = rows(x); x(N - n: N)
+x.tail(n)                          // x(end - n + 1: end)
+x.tail<n>()                        // x(end - n + 1: end)
 x.segment(i, n)                    // x(i+1 : i+n)
 x.segment<n>(i)                    // x(i+1 : i+n)
 P.block(i, j, rows, cols)          // P(i+1 : i+rows, j+1 : j+cols)
 P.block<rows, cols>(i, j)          // P(i+1 : i+rows, j+1 : j+cols)
+P.row(i)                           // P(i+1, :)
+P.col(j)                           // P(:, j+1)
+P.leftCols<cols>()                 // P(:, 1:cols)
+P.leftCols(cols)                   // P(:, 1:cols)
+P.middleCols<cols>(j)              // P(:, j+1:j+cols)
+P.middleCols(j, cols)              // P(:, j+1:j+cols)
+P.rightCols<cols>()                // P(:, end-cols+1:end)
+P.rightCols(cols)                  // P(:, end-cols+1:end)
+P.topRows<rows>()                  // P(1:rows, :)
+P.topRows(rows)                    // P(1:rows, :)
+P.middleRows<rows>(i)              // P(:, i+1:i+rows)
+P.middleRows(i, rows)              // P(:, i+1:i+rows)
+P.bottomRows<rows>()               // P(:, end-rows+1:end)
+P.bottomRows(rows)                 // P(:, end-rows+1:end)
 P.topLeftCorner(rows, cols)        // P(1:rows, 1:cols)
-P.topRightCorner(rows, cols)       // [m n]=size(P); P(1:rows, n-cols+1:n)
-P.bottomLeftCorner(rows, cols)     // [m n]=size(P); P(m-rows+1:m, 1:cols)
-P.bottomRightCorner(rows, cols)    // [m n]=size(P); P(m-rows+1:m, n-cols+1:n)
+P.topRightCorner(rows, cols)       // P(1:rows, end-cols+1:end)
+P.bottomLeftCorner(rows, cols)     // P(end-rows+1:end, 1:cols)
+P.bottomRightCorner(rows, cols)    // P(end-rows+1:end, end-cols+1:end)
 P.topLeftCorner<rows,cols>()       // P(1:rows, 1:cols)
-P.topRightCorner<rows,cols>()      // [m n]=size(P); P(1:rows, n-cols+1:n)
-P.bottomLeftCorner<rows,cols>()    // [m n]=size(P); P(m-rows+1:m, 1:cols)
-P.bottomRightCorner<rows,cols>()   // [m n]=size(P); P(m-rows+1:m, n-cols+1:n)
+P.topRightCorner<rows,cols>()      // P(1:rows, end-cols+1:end)
+P.bottomLeftCorner<rows,cols>()    // P(end-rows+1:end, 1:cols)
+P.bottomRightCorner<rows,cols>()   // P(end-rows+1:end, end-cols+1:end)
 
 // Of particular note is Eigen's swap function which is highly optimized.
 // Eigen                           // Matlab
@@ -126,10 +140,8 @@ int r, c;
 // Eigen                  // Matlab
 R.minCoeff()              // min(R(:))
 R.maxCoeff()              // max(R(:))
-s = R.minCoeff(&r, &c)    // [aa, bb] = min(R); [cc, dd] = min(aa);
-                          // r = bb(dd); c = dd; s = cc
-s = R.maxCoeff(&r, &c)    // [aa, bb] = max(R); [cc, dd] = max(aa);
-                          // row = bb(dd); col = dd; s = cc
+s = R.minCoeff(&r, &c)    // [s, i] = min(R(:)); [r, c] = ind2sub(size(R), i);
+s = R.maxCoeff(&r, &c)    // [s, i] = max(R(:)); [r, c] = ind2sub(size(R), i);
 R.sum()                   // sum(R(:))
 R.colwise().sum()         // sum(R)
 R.rowwise().sum()         // sum(R, 2) or sum(R')'

From 8a49dd562624de8ae290b4e38705d5d3540d0f0b Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Thu, 19 Dec 2013 11:55:17 +0100
Subject: [PATCH 04/48] Fixed typos in comments

---
 Eigen/src/Geometry/Quaternion.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index d036c018a..a6a8df203 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -194,11 +194,11 @@ public:
   * \brief The quaternion class used to represent 3D orientations and rotations
   *
   * \tparam _Scalar the scalar type, i.e., the type of the coefficients
-  * \tparam _Options controls the memory alignement of the coeffecients. Can be \# AutoAlign or \# DontAlign. Default is AutoAlign.
+  * \tparam _Options controls the memory alignment of the coefficients. Can be \# AutoAlign or \# DontAlign. Default is AutoAlign.
   *
   * This class represents a quaternion \f$ w+xi+yj+zk \f$ that is a convenient representation of
   * orientations and rotations of objects in three dimensions. Compared to other representations
-  * like Euler angles or 3x3 matrices, quatertions offer the following advantages:
+  * like Euler angles or 3x3 matrices, quaternions offer the following advantages:
   * \li \b compact storage (4 scalars)
   * \li \b efficient to compose (28 flops),
   * \li \b stable spherical interpolation
@@ -385,7 +385,7 @@ class Map<Quaternion<_Scalar>, _Options >
 
     /** Constructs a Mapped Quaternion object from the pointer \a coeffs
       *
-      * The pointer \a coeffs must reference the four coeffecients of Quaternion in the following order:
+      * The pointer \a coeffs must reference the four coefficients of Quaternion in the following order:
       * \code *coeffs == {x, y, z, w} \endcode
       *
       * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
@@ -399,16 +399,16 @@ class Map<Quaternion<_Scalar>, _Options >
 };
 
 /** \ingroup Geometry_Module
-  * Map an unaligned array of single precision scalar as a quaternion */
+  * Map an unaligned array of single precision scalars as a quaternion */
 typedef Map<Quaternion<float>, 0>         QuaternionMapf;
 /** \ingroup Geometry_Module
-  * Map an unaligned array of double precision scalar as a quaternion */
+  * Map an unaligned array of double precision scalars as a quaternion */
 typedef Map<Quaternion<double>, 0>        QuaternionMapd;
 /** \ingroup Geometry_Module
-  * Map a 16-bits aligned array of double precision scalars as a quaternion */
+  * Map a 16-byte aligned array of single precision scalars as a quaternion */
 typedef Map<Quaternion<float>, Aligned>   QuaternionMapAlignedf;
 /** \ingroup Geometry_Module
-  * Map a 16-bits aligned array of double precision scalars as a quaternion */
+  * Map a 16-byte aligned array of double precision scalars as a quaternion */
 typedef Map<Quaternion<double>, Aligned>  QuaternionMapAlignedd;
 
 /***************************************************************************
@@ -579,7 +579,7 @@ inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Deri
   Scalar c = v1.dot(v0);
 
   // if dot == -1, vectors are nearly opposites
-  // => accuraletly compute the rotation axis by computing the
+  // => accurately compute the rotation axis by computing the
   //    intersection of the two planes. This is done by solving:
   //       x^T v0 = 0
   //       x^T v1 = 0

From 1200bd2ef088da242b8e0d34cb189a36e8754336 Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Sat, 21 Dec 2013 21:46:27 +0100
Subject: [PATCH 05/48] Grafted from
 5725:cdedc9e90d21099e8b3191f95425680ebe710d6f  and resolved conflicts

---
 Eigen/src/plugins/BlockMethods.h | 240 ++++++++++++++++++-------------
 1 file changed, 137 insertions(+), 103 deletions(-)

diff --git a/Eigen/src/plugins/BlockMethods.h b/Eigen/src/plugins/BlockMethods.h
index 3bc345211..9b7fdc4aa 100644
--- a/Eigen/src/plugins/BlockMethods.h
+++ b/Eigen/src/plugins/BlockMethods.h
@@ -119,13 +119,13 @@ inline const Block<const Derived, CRows, CCols> topRightCorner() const
 
 /** \returns an expression of a top-right corner of *this.
   *
-  * \tparam CRows number of rows in corner as specified at compile time
-  * \tparam CCols number of columns in corner as specified at compile time
-  * \param  cRows number of rows in corner as specified at run time
-  * \param  cCols number of columns in corner as specified at run time
+  * \tparam CRows number of rows in corner as specified at compile-time
+  * \tparam CCols number of columns in corner as specified at compile-time
+  * \param  cRows number of rows in corner as specified at run-time
+  * \param  cCols number of columns in corner as specified at run-time
   *
-  * This function is mainly useful for corners where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for corners where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
   * information should not contradict. In other words, \a cRows should equal \a CRows unless
   * \a CRows is \a Dynamic, and the same for the number of columns.
   *
@@ -198,13 +198,13 @@ inline const Block<const Derived, CRows, CCols> topLeftCorner() const
 
 /** \returns an expression of a top-left corner of *this.
   *
-  * \tparam CRows number of rows in corner as specified at compile time
-  * \tparam CCols number of columns in corner as specified at compile time
-  * \param  cRows number of rows in corner as specified at run time
-  * \param  cCols number of columns in corner as specified at run time
+  * \tparam CRows number of rows in corner as specified at compile-time
+  * \tparam CCols number of columns in corner as specified at compile-time
+  * \param  cRows number of rows in corner as specified at run-time
+  * \param  cCols number of columns in corner as specified at run-time
   *
-  * This function is mainly useful for corners where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for corners where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
   * information should not contradict. In other words, \a cRows should equal \a CRows unless
   * \a CRows is \a Dynamic, and the same for the number of columns.
   *
@@ -277,13 +277,13 @@ inline const Block<const Derived, CRows, CCols> bottomRightCorner() const
 
 /** \returns an expression of a bottom-right corner of *this.
   *
-  * \tparam CRows number of rows in corner as specified at compile time
-  * \tparam CCols number of columns in corner as specified at compile time
-  * \param  cRows number of rows in corner as specified at run time
-  * \param  cCols number of columns in corner as specified at run time
+  * \tparam CRows number of rows in corner as specified at compile-time
+  * \tparam CCols number of columns in corner as specified at compile-time
+  * \param  cRows number of rows in corner as specified at run-time
+  * \param  cCols number of columns in corner as specified at run-time
   *
-  * This function is mainly useful for corners where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for corners where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
   * information should not contradict. In other words, \a cRows should equal \a CRows unless
   * \a CRows is \a Dynamic, and the same for the number of columns.
   *
@@ -356,13 +356,13 @@ inline const Block<const Derived, CRows, CCols> bottomLeftCorner() const
 
 /** \returns an expression of a bottom-left corner of *this.
   *
-  * \tparam CRows number of rows in corner as specified at compile time
-  * \tparam CCols number of columns in corner as specified at compile time
-  * \param  cRows number of rows in corner as specified at run time
-  * \param  cCols number of columns in corner as specified at run time
+  * \tparam CRows number of rows in corner as specified at compile-time
+  * \tparam CCols number of columns in corner as specified at compile-time
+  * \param  cRows number of rows in corner as specified at run-time
+  * \param  cCols number of columns in corner as specified at run-time
   *
-  * This function is mainly useful for corners where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for corners where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
   * information should not contradict. In other words, \a cRows should equal \a CRows unless
   * \a CRows is \a Dynamic, and the same for the number of columns.
   *
@@ -410,7 +410,11 @@ inline ConstRowsBlockXpr topRows(Index n) const
 
 /** \returns a block consisting of the top rows of *this.
   *
-  * \tparam N the number of rows in the block
+  * \tparam N the number of rows in the block as specified at compile-time
+  * \param n the number of rows in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
   *
   * Example: \include MatrixBase_template_int_topRows.cpp
   * Output: \verbinclude MatrixBase_template_int_topRows.out
@@ -419,17 +423,17 @@ inline ConstRowsBlockXpr topRows(Index n) const
   */
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename NRowsBlockXpr<N>::Type topRows()
+inline typename NRowsBlockXpr<N>::Type topRows(Index n = N)
 {
-  return typename NRowsBlockXpr<N>::Type(derived(), 0, 0, N, cols());
+  return typename NRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
 }
 
 /** This is the const version of topRows<int>().*/
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstNRowsBlockXpr<N>::Type topRows() const
+inline typename ConstNRowsBlockXpr<N>::Type topRows(Index n = N) const
 {
-  return typename ConstNRowsBlockXpr<N>::Type(derived(), 0, 0, N, cols());
+  return typename ConstNRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
 }
 
 
@@ -458,7 +462,11 @@ inline ConstRowsBlockXpr bottomRows(Index n) const
 
 /** \returns a block consisting of the bottom rows of *this.
   *
-  * \tparam N the number of rows in the block
+  * \tparam N the number of rows in the block as specified at compile-time
+  * \param n the number of rows in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
   *
   * Example: \include MatrixBase_template_int_bottomRows.cpp
   * Output: \verbinclude MatrixBase_template_int_bottomRows.out
@@ -467,17 +475,17 @@ inline ConstRowsBlockXpr bottomRows(Index n) const
   */
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename NRowsBlockXpr<N>::Type bottomRows()
+inline typename NRowsBlockXpr<N>::Type bottomRows(Index n = N)
 {
-  return typename NRowsBlockXpr<N>::Type(derived(), rows() - N, 0, N, cols());
+  return typename NRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
 }
 
 /** This is the const version of bottomRows<int>().*/
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstNRowsBlockXpr<N>::Type bottomRows() const
+inline typename ConstNRowsBlockXpr<N>::Type bottomRows(Index n = N) const
 {
-  return typename ConstNRowsBlockXpr<N>::Type(derived(), rows() - N, 0, N, cols());
+  return typename ConstNRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
 }
 
 
@@ -485,7 +493,7 @@ inline typename ConstNRowsBlockXpr<N>::Type bottomRows() const
 /** \returns a block consisting of a range of rows of *this.
   *
   * \param startRow the index of the first row in the block
-  * \param numRows the number of rows in the block
+  * \param n the number of rows in the block
   *
   * Example: \include DenseBase_middleRows_int.cpp
   * Output: \verbinclude DenseBase_middleRows_int.out
@@ -493,22 +501,26 @@ inline typename ConstNRowsBlockXpr<N>::Type bottomRows() const
   * \sa class Block, block(Index,Index,Index,Index)
   */
 EIGEN_DEVICE_FUNC
-inline RowsBlockXpr middleRows(Index startRow, Index numRows)
+inline RowsBlockXpr middleRows(Index startRow, Index n)
 {
-  return RowsBlockXpr(derived(), startRow, 0, numRows, cols());
+  return RowsBlockXpr(derived(), startRow, 0, n, cols());
 }
 
 /** This is the const version of middleRows(Index,Index).*/
 EIGEN_DEVICE_FUNC
-inline ConstRowsBlockXpr middleRows(Index startRow, Index numRows) const
+inline ConstRowsBlockXpr middleRows(Index startRow, Index n) const
 {
-  return ConstRowsBlockXpr(derived(), startRow, 0, numRows, cols());
+  return ConstRowsBlockXpr(derived(), startRow, 0, n, cols());
 }
 
 /** \returns a block consisting of a range of rows of *this.
   *
-  * \tparam N the number of rows in the block
+  * \tparam N the number of rows in the block as specified at compile-time
   * \param startRow the index of the first row in the block
+  * \param n the number of rows in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
   *
   * Example: \include DenseBase_template_int_middleRows.cpp
   * Output: \verbinclude DenseBase_template_int_middleRows.out
@@ -517,17 +529,17 @@ inline ConstRowsBlockXpr middleRows(Index startRow, Index numRows) const
   */
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow)
+inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N)
 {
-  return typename NRowsBlockXpr<N>::Type(derived(), startRow, 0, N, cols());
+  return typename NRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
 }
 
 /** This is the const version of middleRows<int>().*/
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow) const
+inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N) const
 {
-  return typename ConstNRowsBlockXpr<N>::Type(derived(), startRow, 0, N, cols());
+  return typename ConstNRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
 }
 
 
@@ -556,7 +568,11 @@ inline ConstColsBlockXpr leftCols(Index n) const
 
 /** \returns a block consisting of the left columns of *this.
   *
-  * \tparam N the number of columns in the block
+  * \tparam N the number of columns in the block as specified at compile-time
+  * \param n the number of columns in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
   *
   * Example: \include MatrixBase_template_int_leftCols.cpp
   * Output: \verbinclude MatrixBase_template_int_leftCols.out
@@ -565,17 +581,17 @@ inline ConstColsBlockXpr leftCols(Index n) const
   */
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename NColsBlockXpr<N>::Type leftCols()
+inline typename NColsBlockXpr<N>::Type leftCols(Index n = N)
 {
-  return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), N);
+  return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
 }
 
 /** This is the const version of leftCols<int>().*/
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstNColsBlockXpr<N>::Type leftCols() const
+inline typename ConstNColsBlockXpr<N>::Type leftCols(Index n = N) const
 {
-  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, 0, rows(), N);
+  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
 }
 
 
@@ -604,7 +620,11 @@ inline ConstColsBlockXpr rightCols(Index n) const
 
 /** \returns a block consisting of the right columns of *this.
   *
-  * \tparam N the number of columns in the block
+  * \tparam N the number of columns in the block as specified at compile-time
+  * \param n the number of columns in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
   *
   * Example: \include MatrixBase_template_int_rightCols.cpp
   * Output: \verbinclude MatrixBase_template_int_rightCols.out
@@ -613,17 +633,17 @@ inline ConstColsBlockXpr rightCols(Index n) const
   */
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename NColsBlockXpr<N>::Type rightCols()
+inline typename NColsBlockXpr<N>::Type rightCols(Index n = N)
 {
-  return typename NColsBlockXpr<N>::Type(derived(), 0, cols() - N, rows(), N);
+  return typename NColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
 }
 
 /** This is the const version of rightCols<int>().*/
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstNColsBlockXpr<N>::Type rightCols() const
+inline typename ConstNColsBlockXpr<N>::Type rightCols(Index n = N) const
 {
-  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, cols() - N, rows(), N);
+  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
 }
 
 
@@ -653,8 +673,12 @@ inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const
 
 /** \returns a block consisting of a range of columns of *this.
   *
-  * \tparam N the number of columns in the block
+  * \tparam N the number of columns in the block as specified at compile-time
   * \param startCol the index of the first column in the block
+  * \param n the number of columns in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
   *
   * Example: \include DenseBase_template_int_middleCols.cpp
   * Output: \verbinclude DenseBase_template_int_middleCols.out
@@ -663,17 +687,17 @@ inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const
   */
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename NColsBlockXpr<N>::Type middleCols(Index startCol)
+inline typename NColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N)
 {
-  return typename NColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), N);
+  return typename NColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
 }
 
 /** This is the const version of middleCols<int>().*/
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol) const
+inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N) const
 {
-  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), N);
+  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
 }
 
 
@@ -711,15 +735,15 @@ inline const Block<const Derived, BlockRows, BlockCols> block(Index startRow, In
 
 /** \returns an expression of a block in *this.
   *
-  * \tparam BlockRows number of rows in block as specified at compile time
-  * \tparam BlockCols number of columns in block as specified at compile time
+  * \tparam BlockRows number of rows in block as specified at compile-time
+  * \tparam BlockCols number of columns in block as specified at compile-time
   * \param  startRow  the first row in the block
   * \param  startCol  the first column in the block
-  * \param  blockRows number of rows in block as specified at run time
-  * \param  blockCols number of columns in block as specified at run time
+  * \param  blockRows number of rows in block as specified at run-time
+  * \param  blockCols number of columns in block as specified at run-time
   *
-  * This function is mainly useful for blocks where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for blocks where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
   * information should not contradict. In other words, \a blockRows should equal \a BlockRows unless
   * \a BlockRows is \a Dynamic, and the same for the number of columns.
   *
@@ -786,7 +810,7 @@ inline ConstRowXpr row(Index i) const
   * \only_for_vectors
   *
   * \param start the first coefficient in the segment
-  * \param vecSize the number of coefficients in the segment
+  * \param n the number of coefficients in the segment
   *
   * Example: \include MatrixBase_segment_int_int.cpp
   * Output: \verbinclude MatrixBase_segment_int_int.out
@@ -798,26 +822,26 @@ inline ConstRowXpr row(Index i) const
   * \sa class Block, segment(Index)
   */
 EIGEN_DEVICE_FUNC
-inline SegmentReturnType segment(Index start, Index vecSize)
+inline SegmentReturnType segment(Index start, Index n)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return SegmentReturnType(derived(), start, vecSize);
+  return SegmentReturnType(derived(), start, n);
 }
 
 
 /** This is the const version of segment(Index,Index).*/
 EIGEN_DEVICE_FUNC
-inline ConstSegmentReturnType segment(Index start, Index vecSize) const
+inline ConstSegmentReturnType segment(Index start, Index n) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return ConstSegmentReturnType(derived(), start, vecSize);
+  return ConstSegmentReturnType(derived(), start, n);
 }
 
 /** \returns a dynamic-size expression of the first coefficients of *this.
   *
   * \only_for_vectors
   *
-  * \param vecSize the number of coefficients in the block
+  * \param n the number of coefficients in the segment
   *
   * Example: \include MatrixBase_start_int.cpp
   * Output: \verbinclude MatrixBase_start_int.out
@@ -829,26 +853,25 @@ inline ConstSegmentReturnType segment(Index start, Index vecSize) const
   * \sa class Block, block(Index,Index)
   */
 EIGEN_DEVICE_FUNC
-inline SegmentReturnType head(Index vecSize)
+inline SegmentReturnType head(Index n)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return SegmentReturnType(derived(), 0, vecSize);
+  return SegmentReturnType(derived(), 0, n);
 }
 
 /** This is the const version of head(Index).*/
 EIGEN_DEVICE_FUNC
-inline ConstSegmentReturnType
-  head(Index vecSize) const
+inline ConstSegmentReturnType head(Index n) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return ConstSegmentReturnType(derived(), 0, vecSize);
+  return ConstSegmentReturnType(derived(), 0, n);
 }
 
 /** \returns a dynamic-size expression of the last coefficients of *this.
   *
   * \only_for_vectors
   *
-  * \param vecSize the number of coefficients in the block
+  * \param n the number of coefficients in the segment
   *
   * Example: \include MatrixBase_end_int.cpp
   * Output: \verbinclude MatrixBase_end_int.out
@@ -860,102 +883,113 @@ inline ConstSegmentReturnType
   * \sa class Block, block(Index,Index)
   */
 EIGEN_DEVICE_FUNC
-inline SegmentReturnType tail(Index vecSize)
+inline SegmentReturnType tail(Index n)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return SegmentReturnType(derived(), this->size() - vecSize, vecSize);
+  return SegmentReturnType(derived(), this->size() - n, n);
 }
 
 /** This is the const version of tail(Index).*/
 EIGEN_DEVICE_FUNC
-inline ConstSegmentReturnType tail(Index vecSize) const
+inline ConstSegmentReturnType tail(Index n) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return ConstSegmentReturnType(derived(), this->size() - vecSize, vecSize);
+  return ConstSegmentReturnType(derived(), this->size() - n, n);
 }
 
 /** \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this
   *
   * \only_for_vectors
   *
-  * The template parameter \a Size is the number of coefficients in the block
+  * \tparam N the number of coefficients in the segment as specified at compile-time
+  * \param start the index of the first element in the segment
+  * \param n the number of coefficients in the segment as specified at compile-time
   *
-  * \param start the index of the first element of the sub-vector
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
   *
   * Example: \include MatrixBase_template_int_segment.cpp
   * Output: \verbinclude MatrixBase_template_int_segment.out
   *
   * \sa class Block
   */
-template<int Size>
+template<int N>
 EIGEN_DEVICE_FUNC
-inline typename FixedSegmentReturnType<Size>::Type segment(Index start)
+inline typename FixedSegmentReturnType<N>::Type segment(Index start, Index n = N)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename FixedSegmentReturnType<Size>::Type(derived(), start);
+  return typename FixedSegmentReturnType<N>::Type(derived(), start, n);
 }
 
 /** This is the const version of segment<int>(Index).*/
-template<int Size>
+template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstFixedSegmentReturnType<Size>::Type segment(Index start) const
+inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index n = N) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename ConstFixedSegmentReturnType<Size>::Type(derived(), start);
+  return typename ConstFixedSegmentReturnType<N>::Type(derived(), start, n);
 }
 
 /** \returns a fixed-size expression of the first coefficients of *this.
   *
   * \only_for_vectors
   *
-  * The template parameter \a Size is the number of coefficients in the block
+  * \tparam N the number of coefficients in the segment as specified at compile-time
+  * \param  n the number of coefficients in the segment as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
   *
   * Example: \include MatrixBase_template_int_start.cpp
   * Output: \verbinclude MatrixBase_template_int_start.out
   *
   * \sa class Block
   */
-template<int Size>
+template<int N>
 EIGEN_DEVICE_FUNC
-inline typename FixedSegmentReturnType<Size>::Type head()
+inline typename FixedSegmentReturnType<N>::Type head(Index n = N)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename FixedSegmentReturnType<Size>::Type(derived(), 0);
+  return typename FixedSegmentReturnType<N>::Type(derived(), 0, n);
 }
 
 /** This is the const version of head<int>().*/
-template<int Size>
+template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstFixedSegmentReturnType<Size>::Type head() const
+inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename ConstFixedSegmentReturnType<Size>::Type(derived(), 0);
+  return typename ConstFixedSegmentReturnType<N>::Type(derived(), 0, n);
 }
 
 /** \returns a fixed-size expression of the last coefficients of *this.
   *
   * \only_for_vectors
   *
-  * The template parameter \a Size is the number of coefficients in the block
+  * \tparam N the number of coefficients in the segment as specified at compile-time
+  * \param  n the number of coefficients in the segment as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
   *
   * Example: \include MatrixBase_template_int_end.cpp
   * Output: \verbinclude MatrixBase_template_int_end.out
   *
   * \sa class Block
   */
-template<int Size>
+template<int N>
 EIGEN_DEVICE_FUNC
-inline typename FixedSegmentReturnType<Size>::Type tail()
+inline typename FixedSegmentReturnType<N>::Type tail(Index n = N)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename FixedSegmentReturnType<Size>::Type(derived(), size() - Size);
+  return typename FixedSegmentReturnType<N>::Type(derived(), size() - n);
 }
 
 /** This is the const version of tail<int>.*/
-template<int Size>
+template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstFixedSegmentReturnType<Size>::Type tail() const
+inline typename ConstFixedSegmentReturnType<N>::Type tail(Index n = N) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename ConstFixedSegmentReturnType<Size>::Type(derived(), size() - Size);
+  return typename ConstFixedSegmentReturnType<N>::Type(derived(), size() - n);
 }

From bbf373bbe9306c701c2488bb6ea05600cf63239e Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Sat, 21 Dec 2013 22:14:03 +0100
Subject: [PATCH 06/48] Applied patch from Richard JW Roberts, resolving bug
 #704

---
 Eigen/src/QR/HouseholderQR.h     | 94 +++++++++++++++++---------------
 Eigen/src/QR/HouseholderQR_MKL.h | 26 +++++----
 2 files changed, 64 insertions(+), 56 deletions(-)

diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index ad156396a..352dbf3f0 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -251,56 +251,62 @@ void householder_qr_inplace_unblocked(MatrixQR& mat, HCoeffs& hCoeffs, typename
 }
 
 /** \internal */
-template<typename MatrixQR, typename HCoeffs>
-void householder_qr_inplace_blocked(MatrixQR& mat, HCoeffs& hCoeffs,
-                                       typename MatrixQR::Index maxBlockSize=32,
-                                       typename MatrixQR::Scalar* tempData = 0)
+template<typename MatrixQR, typename HCoeffs,
+  typename MatrixQRScalar = typename MatrixQR::Scalar,
+  bool InnerStrideIsOne = (MatrixQR::InnerStrideAtCompileTime == 1 && HCoeffs::InnerStrideAtCompileTime == 1)>
+struct householder_qr_inplace_blocked
 {
-  typedef typename MatrixQR::Index Index;
-  typedef typename MatrixQR::Scalar Scalar;
-  typedef Block<MatrixQR,Dynamic,Dynamic> BlockType;
-
-  Index rows = mat.rows();
-  Index cols = mat.cols();
-  Index size = (std::min)(rows, cols);
-
-  typedef Matrix<Scalar,Dynamic,1,ColMajor,MatrixQR::MaxColsAtCompileTime,1> TempType;
-  TempType tempVector;
-  if(tempData==0)
+  // This is specialized for MKL-supported Scalar types in HouseholderQR_MKL.h
+  static void run(MatrixQR& mat, HCoeffs& hCoeffs,
+      typename MatrixQR::Index maxBlockSize=32,
+      typename MatrixQR::Scalar* tempData = 0)
   {
-    tempVector.resize(cols);
-    tempData = tempVector.data();
-  }
+    typedef typename MatrixQR::Index Index;
+    typedef typename MatrixQR::Scalar Scalar;
+    typedef Block<MatrixQR,Dynamic,Dynamic> BlockType;
 
-  Index blockSize = (std::min)(maxBlockSize,size);
+    Index rows = mat.rows();
+    Index cols = mat.cols();
+    Index size = (std::min)(rows, cols);
 
-  Index k = 0;
-  for (k = 0; k < size; k += blockSize)
-  {
-    Index bs = (std::min)(size-k,blockSize);  // actual size of the block
-    Index tcols = cols - k - bs;            // trailing columns
-    Index brows = rows-k;                   // rows of the block
-
-    // partition the matrix:
-    //        A00 | A01 | A02
-    // mat  = A10 | A11 | A12
-    //        A20 | A21 | A22
-    // and performs the qr dec of [A11^T A12^T]^T
-    // and update [A21^T A22^T]^T using level 3 operations.
-    // Finally, the algorithm continue on A22
-
-    BlockType A11_21 = mat.block(k,k,brows,bs);
-    Block<HCoeffs,Dynamic,1> hCoeffsSegment = hCoeffs.segment(k,bs);
-
-    householder_qr_inplace_unblocked(A11_21, hCoeffsSegment, tempData);
-
-    if(tcols)
+    typedef Matrix<Scalar,Dynamic,1,ColMajor,MatrixQR::MaxColsAtCompileTime,1> TempType;
+    TempType tempVector;
+    if(tempData==0)
     {
-      BlockType A21_22 = mat.block(k,k+bs,brows,tcols);
-      apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment.adjoint());
+      tempVector.resize(cols);
+      tempData = tempVector.data();
+    }
+
+    Index blockSize = (std::min)(maxBlockSize,size);
+
+    Index k = 0;
+    for (k = 0; k < size; k += blockSize)
+    {
+      Index bs = (std::min)(size-k,blockSize);  // actual size of the block
+      Index tcols = cols - k - bs;            // trailing columns
+      Index brows = rows-k;                   // rows of the block
+
+      // partition the matrix:
+      //        A00 | A01 | A02
+      // mat  = A10 | A11 | A12
+      //        A20 | A21 | A22
+      // and performs the qr dec of [A11^T A12^T]^T
+      // and update [A21^T A22^T]^T using level 3 operations.
+      // Finally, the algorithm continue on A22
+
+      BlockType A11_21 = mat.block(k,k,brows,bs);
+      Block<HCoeffs,Dynamic,1> hCoeffsSegment = hCoeffs.segment(k,bs);
+
+      householder_qr_inplace_unblocked(A11_21, hCoeffsSegment, tempData);
+
+      if(tcols)
+      {
+        BlockType A21_22 = mat.block(k,k+bs,brows,tcols);
+        apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment.adjoint());
+      }
     }
   }
-}
+};
 
 template<typename _MatrixType, typename Rhs>
 struct solve_retval<HouseholderQR<_MatrixType>, Rhs>
@@ -352,7 +358,7 @@ HouseholderQR<MatrixType>& HouseholderQR<MatrixType>::compute(const MatrixType&
 
   m_temp.resize(cols);
 
-  internal::householder_qr_inplace_blocked(m_qr, m_hCoeffs, 48, m_temp.data());
+  internal::householder_qr_inplace_blocked<MatrixType, HCoeffsType>::run(m_qr, m_hCoeffs, 48, m_temp.data());
 
   m_isInitialized = true;
   return *this;
diff --git a/Eigen/src/QR/HouseholderQR_MKL.h b/Eigen/src/QR/HouseholderQR_MKL.h
index 5313de604..8a3a7e406 100644
--- a/Eigen/src/QR/HouseholderQR_MKL.h
+++ b/Eigen/src/QR/HouseholderQR_MKL.h
@@ -34,7 +34,7 @@
 #ifndef EIGEN_QR_MKL_H
 #define EIGEN_QR_MKL_H
 
-#include "Eigen/src/Core/util/MKL_support.h"
+#include "../Core/util/MKL_support.h"
 
 namespace Eigen { 
 
@@ -44,18 +44,20 @@ namespace internal {
 
 #define EIGEN_MKL_QR_NOPIV(EIGTYPE, MKLTYPE, MKLPREFIX) \
 template<typename MatrixQR, typename HCoeffs> \
-void householder_qr_inplace_blocked(MatrixQR& mat, HCoeffs& hCoeffs, \
-                                       typename MatrixQR::Index maxBlockSize=32, \
-                                       EIGTYPE* tempData = 0) \
+struct householder_qr_inplace_blocked<MatrixQR, HCoeffs, EIGTYPE, true> \
 { \
-  lapack_int m = mat.rows(); \
-  lapack_int n = mat.cols(); \
-  lapack_int lda = mat.outerStride(); \
-  lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
-  LAPACKE_##MKLPREFIX##geqrf( matrix_order, m, n, (MKLTYPE*)mat.data(), lda, (MKLTYPE*)hCoeffs.data()); \
-  hCoeffs.adjointInPlace(); \
-\
-}
+  static void run(MatrixQR& mat, HCoeffs& hCoeffs, \
+      typename MatrixQR::Index = 32, \
+      typename MatrixQR::Scalar* = 0) \
+  { \
+    lapack_int m = (lapack_int) mat.rows(); \
+    lapack_int n = (lapack_int) mat.cols(); \
+    lapack_int lda = (lapack_int) mat.outerStride(); \
+    lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
+    LAPACKE_##MKLPREFIX##geqrf( matrix_order, m, n, (MKLTYPE*)mat.data(), lda, (MKLTYPE*)hCoeffs.data()); \
+    hCoeffs.adjointInPlace(); \
+  } \
+};
 
 EIGEN_MKL_QR_NOPIV(double, double, d)
 EIGEN_MKL_QR_NOPIV(float, float, s)

From e15cb9f4f83e67096f8f2443afadebba9efab0b4 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Sat, 11 Jan 2014 20:04:36 +0100
Subject: [PATCH 07/48] Make geo_hyperplane unit test more stable (bug #539)

---
 test/geo_hyperplane.cpp | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/test/geo_hyperplane.cpp b/test/geo_hyperplane.cpp
index f26fc1329..befd7d483 100644
--- a/test/geo_hyperplane.cpp
+++ b/test/geo_hyperplane.cpp
@@ -18,10 +18,12 @@ template<typename HyperplaneType> void hyperplane(const HyperplaneType& _plane)
   /* this test covers the following files:
      Hyperplane.h
   */
+  using std::abs;
   typedef typename HyperplaneType::Index Index;
   const Index dim = _plane.dim();
   enum { Options = HyperplaneType::Options };
   typedef typename HyperplaneType::Scalar Scalar;
+  typedef typename HyperplaneType::RealScalar RealScalar;
   typedef Matrix<Scalar, HyperplaneType::AmbientDimAtCompileTime, 1> VectorType;
   typedef Matrix<Scalar, HyperplaneType::AmbientDimAtCompileTime,
                          HyperplaneType::AmbientDimAtCompileTime> MatrixType;
@@ -42,7 +44,10 @@ template<typename HyperplaneType> void hyperplane(const HyperplaneType& _plane)
   VERIFY_IS_APPROX( n1.dot(n1), Scalar(1) );
 
   VERIFY_IS_MUCH_SMALLER_THAN( pl0.absDistance(p0), Scalar(1) );
-  VERIFY_IS_APPROX( pl1.signedDistance(p1 + n1 * s0), s0 );
+  if(numext::abs2(s0)>RealScalar(1e-6))
+    VERIFY_IS_APPROX( pl1.signedDistance(p1 + n1 * s0), s0);
+  else
+    VERIFY_IS_MUCH_SMALLER_THAN( abs(pl1.signedDistance(p1 + n1 * s0) - s0), Scalar(1) );
   VERIFY_IS_MUCH_SMALLER_THAN( pl1.signedDistance(pl1.projection(p0)), Scalar(1) );
   VERIFY_IS_MUCH_SMALLER_THAN( pl1.absDistance(p1 +  pl1.normal().unitOrthogonal() * s1), Scalar(1) );
 
@@ -52,6 +57,8 @@ template<typename HyperplaneType> void hyperplane(const HyperplaneType& _plane)
     MatrixType rot = MatrixType::Random(dim,dim).householderQr().householderQ();
     DiagonalMatrix<Scalar,HyperplaneType::AmbientDimAtCompileTime> scaling(VectorType::Random());
     Translation<Scalar,HyperplaneType::AmbientDimAtCompileTime> translation(VectorType::Random());
+    
+    while(scaling.diagonal().cwiseAbs().minCoeff()<RealScalar(1e-4)) scaling.diagonal() = VectorType::Random();
 
     pl2 = pl1;
     VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot).absDistance(rot * p1), Scalar(1) );
@@ -61,7 +68,7 @@ template<typename HyperplaneType> void hyperplane(const HyperplaneType& _plane)
     VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*scaling).absDistance((rot*scaling) * p1), Scalar(1) );
     pl2 = pl1;
     VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*scaling*translation)
-                                 .absDistance((rot*scaling*translation) * p1), Scalar(1) );
+                                  .absDistance((rot*scaling*translation) * p1), Scalar(1) );
     pl2 = pl1;
     VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*translation,Isometry)
                                  .absDistance((rot*translation) * p1), Scalar(1) );
@@ -104,12 +111,15 @@ template<typename Scalar> void lines()
     Vector result = line_u.intersection(line_v);
 
     // the lines should intersect at the point we called "center"
-    VERIFY_IS_APPROX(result, center);
+    if(abs(a-1) > 1e-2 && abs(v.normalized().dot(u.normalized()))<0.9)
+      VERIFY_IS_APPROX(result, center);
 
     // check conversions between two types of lines
     PLine pl(line_u); // gcc 3.3 will commit suicide if we don't name this variable
-    CoeffsType converted_coeffs = HLine(pl).coeffs();
-    converted_coeffs *= (line_u.coeffs()[0])/(converted_coeffs[0]);
+    HLine line_u2(pl);
+    CoeffsType converted_coeffs = line_u2.coeffs();
+    if(line_u2.normal().dot(line_u.normal())<0.)
+      converted_coeffs = -line_u2.coeffs();
     VERIFY(line_u.coeffs().isApprox(converted_coeffs));
   }
 }

From 548216b7ca396dea5ecd55e177e38de0f80e8396 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Sun, 12 Jan 2014 11:09:06 +0100
Subject: [PATCH 08/48] QuaternionBase::slerp was documented twice and one
 explanation was ambiguous.

---
 Eigen/src/Geometry/Quaternion.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index a6a8df203..803001272 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -150,10 +150,6 @@ public:
   /** \returns the conjugated quaternion */
   Quaternion<Scalar> conjugate() const;
 
-  /** \returns an interpolation for a constant motion between \a other and \c *this
-    * \a t in [0;1]
-    * see http://en.wikipedia.org/wiki/Slerp
-    */
   template<class OtherDerived> Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;
 
   /** \returns \c true if \c *this is approximately equal to \a other, within the precision
@@ -677,8 +673,13 @@ QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& oth
   return static_cast<Scalar>(2 * acos(d));
 }
 
+ 
+    
 /** \returns the spherical linear interpolation between the two quaternions
-  * \c *this and \a other at the parameter \a t
+  * \c *this and \a other at the parameter \a t in [0;1].
+  * 
+  * This represents an interpolation for a constant motion between \c *this and \a other,
+  * see also http://en.wikipedia.org/wiki/Slerp.
   */
 template <class Derived>
 template <class OtherDerived>

From 4cd4be97a7165e6e45ee60aee23b9342af03c491 Mon Sep 17 00:00:00 2001
From: Anton Gladky <gladk@debian.org>
Date: Wed, 15 Jan 2014 17:49:52 +0100
Subject: [PATCH 09/48] Port unsupported constrained CG to Eigen3

---
 .../Eigen/src/IterativeSolvers/ConstrainedConjGrad.h   | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h b/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h
index 3f18beeeb..dc0093eb9 100644
--- a/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h
+++ b/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h
@@ -58,7 +58,9 @@ void pseudo_inverse(const CMatrix &C, CINVMatrix &CINV)
   Scalar rho, rho_1, alpha;
   d.setZero();
 
-  CINV.startFill(); // FIXME estimate the number of non-zeros
+  typedef Triplet<double> T;
+  std::vector<T> tripletList;
+    
   for (Index i = 0; i < rows; ++i)
   {
     d[i] = 1.0;
@@ -84,11 +86,12 @@ void pseudo_inverse(const CMatrix &C, CINVMatrix &CINV)
     // FIXME add a generic "prune/filter" expression for both dense and sparse object to sparse
     for (Index j=0; j<l.size(); ++j)
       if (l[j]<1e-15)
-        CINV.fill(i,j) = l[j];
+	tripletList.push_back(T(i,j,l(j)));
 
+	
     d[i] = 0.0;
   }
-  CINV.endFill();
+  CINV.setFromTriplets(tripletList.begin(), tripletList.end());
 }
 
 
@@ -103,6 +106,7 @@ template<typename TMatrix, typename CMatrix,
 void constrained_cg(const TMatrix& A, const CMatrix& C, VectorX& x,
                        const VectorB& b, const VectorF& f, IterationController &iter)
 {
+  using std::sqrt;
   typedef typename TMatrix::Scalar Scalar;
   typedef typename TMatrix::Index Index;
   typedef Matrix<Scalar,Dynamic,1>  TmpVec;

From a58325ac2f7be7326be358ac51c4f0eebcf7fbf9 Mon Sep 17 00:00:00 2001
From: Jitse Niesen <jitse@maths.leeds.ac.uk>
Date: Tue, 31 Dec 2013 18:06:28 +0000
Subject: [PATCH 10/48] Minor corrections in QR docs.

---
 Eigen/QR                            | 4 +++-
 Eigen/src/QR/FullPivHouseholderQR.h | 8 ++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/Eigen/QR b/Eigen/QR
index ac5b02693..8c7c6162e 100644
--- a/Eigen/QR
+++ b/Eigen/QR
@@ -15,7 +15,9 @@
   *
   * This module provides various QR decompositions
   * This module also provides some MatrixBase methods, including:
-  *  - MatrixBase::qr(),
+  *  - MatrixBase::householderQr()
+  *  - MatrixBase::colPivHouseholderQr()
+  *  - MatrixBase::fullPivHouseholderQr()
   *
   * \code
   * #include <Eigen/QR>
diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h
index e6ab172b3..44eaa1b1a 100644
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@@ -33,13 +33,13 @@ struct traits<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
   *
   * \param MatrixType the type of the matrix of which we are computing the QR decomposition
   *
-  * This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b Q and \b R
+  * This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b P', \b Q and \b R
   * such that 
   * \f[
-  *  \mathbf{A} \, \mathbf{P} = \mathbf{Q} \, \mathbf{R}
+  *  \mathbf{P} \, \mathbf{A} \, \mathbf{P}' = \mathbf{Q} \, \mathbf{R}
   * \f]
-  * by using Householder transformations. Here, \b P is a permutation matrix, \b Q a unitary matrix and \b R an 
-  * upper triangular matrix.
+  * by using Householder transformations. Here, \b P and \b P' are permutation matrices, \b Q a unitary matrix 
+  * and \b R an upper triangular matrix.
   *
   * This decomposition performs a very prudent full pivoting in order to be rank-revealing and achieve optimal
   * numerical stability. The trade-off is that it is slower than HouseholderQR and ColPivHouseholderQR.

From a6a57748ddff8ffc7e00e256385bd83423f68acb Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Sun, 5 Jan 2014 14:24:41 +0100
Subject: [PATCH 11/48] Fix typo

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4135339b1..a4731c8c1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -204,7 +204,7 @@ if(NOT MSVC)
 
   option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
   if(EIGEN_TEST_NEON)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a"8)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8")
     message(STATUS "Enabling NEON in tests/examples")
   endif()
 

From ac409f51f185a4654f9a5c38ab5d1652824836f6 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Tue, 7 Jan 2014 20:17:59 +0100
Subject: [PATCH 12/48] Document the fact that Random and setRandom are not
 reentrant (so not thread-safe)

---
 Eigen/src/Core/Random.h     | 22 ++++++++++++++++------
 doc/Doxyfile.in             |  2 ++
 doc/TopicMultithreading.dox |  2 ++
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/Eigen/src/Core/Random.h b/Eigen/src/Core/Random.h
index a0bd3039e..a4492ddb6 100644
--- a/Eigen/src/Core/Random.h
+++ b/Eigen/src/Core/Random.h
@@ -34,6 +34,8 @@ struct functor_traits<scalar_random_op<Scalar> >
   * The parameters \a rows and \a cols are the number of rows and of columns of
   * the returned matrix. Must be compatible with this MatrixBase type.
   *
+  * \not_reentrant
+  * 
   * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
   * it is redundant to pass \a rows and \a cols as arguments, so Random() should be used
   * instead.
@@ -45,8 +47,8 @@ struct functor_traits<scalar_random_op<Scalar> >
   * This expression has the "evaluate before nesting" flag so that it will be evaluated into
   * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
   * behavior with expressions involving random matrices.
-  *
-  * \sa MatrixBase::setRandom(), MatrixBase::Random(Index), MatrixBase::Random()
+  * 
+  * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
   */
 template<typename Derived>
 inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
@@ -64,6 +66,7 @@ DenseBase<Derived>::Random(Index rows, Index cols)
   * Must be compatible with this MatrixBase type.
   *
   * \only_for_vectors
+  * \not_reentrant
   *
   * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
   * it is redundant to pass \a size as argument, so Random() should be used
@@ -76,7 +79,7 @@ DenseBase<Derived>::Random(Index rows, Index cols)
   * a temporary vector whenever it is nested in a larger expression. This prevents unexpected
   * behavior with expressions involving random matrices.
   *
-  * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random()
+  * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
   */
 template<typename Derived>
 inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
@@ -99,8 +102,10 @@ DenseBase<Derived>::Random(Index size)
   * This expression has the "evaluate before nesting" flag so that it will be evaluated into
   * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
   * behavior with expressions involving random matrices.
+  * 
+  * \not_reentrant
   *
-  * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random(Index)
+  * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
   */
 template<typename Derived>
 inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
@@ -114,6 +119,8 @@ DenseBase<Derived>::Random()
   * Numbers are uniformly spread through their whole definition range for integer types,
   * and in the [-1:1] range for floating point scalar types.
   * 
+  * \not_reentrant
+  * 
   * Example: \include MatrixBase_setRandom.cpp
   * Output: \verbinclude MatrixBase_setRandom.out
   *
@@ -131,11 +138,12 @@ inline Derived& DenseBase<Derived>::setRandom()
   * and in the [-1:1] range for floating point scalar types.
   * 
   * \only_for_vectors
+  * \not_reentrant
   *
   * Example: \include Matrix_setRandom_int.cpp
   * Output: \verbinclude Matrix_setRandom_int.out
   *
-  * \sa MatrixBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, MatrixBase::Random()
+  * \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random()
   */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
@@ -150,13 +158,15 @@ PlainObjectBase<Derived>::setRandom(Index newSize)
   * Numbers are uniformly spread through their whole definition range for integer types,
   * and in the [-1:1] range for floating point scalar types.
   *
+  * \not_reentrant
+  * 
   * \param nbRows the new number of rows
   * \param nbCols the new number of columns
   *
   * Example: \include Matrix_setRandom_int_int.cpp
   * Output: \verbinclude Matrix_setRandom_int_int.out
   *
-  * \sa MatrixBase::setRandom(), setRandom(Index), class CwiseNullaryOp, MatrixBase::Random()
+  * \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random()
   */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index 1a2603b04..85af9f1d4 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -206,6 +206,7 @@ TAB_SIZE               = 8
 # You can put \n's in the value part of an alias to insert newlines.
 
 ALIASES                = "only_for_vectors=This is only for vectors (either row-vectors or column-vectors), i.e. matrices which are known at compile-time to have either one row or one column." \
+			 "not_reentrant=\warning This function is not re-entrant." \
                          "array_module=This is defined in the %Array module. \code #include <Eigen/Array> \endcode" \
                          "cholesky_module=This is defined in the %Cholesky module. \code #include <Eigen/Cholesky> \endcode" \
                          "eigenvalues_module=This is defined in the %Eigenvalues module. \code #include <Eigen/Eigenvalues> \endcode" \
@@ -223,6 +224,7 @@ ALIASES                = "only_for_vectors=This is only for vectors (either row-
                          "note_about_checking_solutions=This method just tries to find as good a solution as possible. If you want to check whether a solution exists or if it is accurate, just call this function to get a result and then compute the error of this result, or use MatrixBase::isApprox() directly, for instance like this: \code bool a_solution_exists = (A*result).isApprox(b, precision); \endcode This method avoids dividing by zero, so that the non-existence of a solution doesn't by itself mean that you'll get \c inf or \c nan values." \
                          "note_try_to_help_rvo=This function returns the result by value. In order to make that efficient, it is implemented as just a return statement using a special constructor, hopefully allowing the compiler to perform a RVO (return value optimization)." \
                          "nonstableyet=\warning This is not considered to be part of the stable public API yet. Changes may happen in future releases. See \ref Experimental \"Experimental parts of Eigen\"
+                         
 
 ALIASES += "eigenAutoToc=  "
 ALIASES += "eigenManualPage=\defgroup"
diff --git a/doc/TopicMultithreading.dox b/doc/TopicMultithreading.dox
index f7d082668..fb944af29 100644
--- a/doc/TopicMultithreading.dox
+++ b/doc/TopicMultithreading.dox
@@ -39,6 +39,8 @@ int main(int argc, char** argv)
 }
 \endcode
 
+\warning note that all functions generating random matrices are \b not re-entrant nor thread-safe. Those include DenseBase::Random(), and DenseBase::setRandom() despite a call to Eigen::initParallel(). This is because these functions are based on std::rand which is not re-entrant. For thread-safe random generator, we recommend the use of boost::random of c++11 random feature.
+
 In the case your application is parallelized with OpenMP, you might want to disable Eigen's own parallization as detailed in the previous section.
 
 */

From 92190a1cafc3a8613e1200cd4ed81dbc509a93bd Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Tue, 7 Jan 2014 20:23:35 +0100
Subject: [PATCH 13/48] Add an example showing how to use C++11 random
 distributions

---
 Eigen/src/Core/CwiseNullaryOp.h       |  3 +++
 Eigen/src/Core/Random.h               |  2 ++
 doc/special_examples/CMakeLists.txt   | 19 +++++++++++++++++++
 doc/special_examples/random_cpp11.cpp | 14 ++++++++++++++
 4 files changed, 38 insertions(+)
 create mode 100644 doc/special_examples/random_cpp11.cpp

diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index 1d4ee50a8..124383114 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -138,6 +138,9 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f
   *
   * The template parameter \a CustomNullaryOp is the type of the functor.
   *
+  * Here is an example with C++11 random generators: \include random_cpp11.cpp
+  * Output: \verbinclude random_cpp11.out
+  * 
   * \sa class CwiseNullaryOp
   */
 template<typename Derived>
diff --git a/Eigen/src/Core/Random.h b/Eigen/src/Core/Random.h
index a4492ddb6..fdd43ed0c 100644
--- a/Eigen/src/Core/Random.h
+++ b/Eigen/src/Core/Random.h
@@ -48,6 +48,8 @@ struct functor_traits<scalar_random_op<Scalar> >
   * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
   * behavior with expressions involving random matrices.
   * 
+  * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators.
+  *
   * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
   */
 template<typename Derived>
diff --git a/doc/special_examples/CMakeLists.txt b/doc/special_examples/CMakeLists.txt
index 0c9b3c3ba..45e2339e3 100644
--- a/doc/special_examples/CMakeLists.txt
+++ b/doc/special_examples/CMakeLists.txt
@@ -19,3 +19,22 @@ if(QT4_FOUND)
                      
   add_dependencies(all_examples Tutorial_sparse_example)
 endif(QT4_FOUND)
+
+check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11)
+if(EIGEN_COMPILER_SUPPORT_CPP11)
+  add_executable(random_cpp11 random_cpp11.cpp)
+  target_link_libraries(random_cpp11 ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO})
+  add_dependencies(all_examples random_cpp11)
+  ei_add_target_property(random_cpp11 COMPILE_FLAGS "-std=c++11")
+  
+  get_target_property(random_cpp11_exec
+                      random_cpp11 LOCATION)
+  add_custom_command(
+    TARGET random_cpp11
+    POST_BUILD
+    COMMAND ${random_cpp11_exec}
+    ARGS >${CMAKE_CURRENT_BINARY_DIR}/random_cpp11.out
+  )
+  
+  
+endif()
\ No newline at end of file
diff --git a/doc/special_examples/random_cpp11.cpp b/doc/special_examples/random_cpp11.cpp
new file mode 100644
index 000000000..ccd7c77d0
--- /dev/null
+++ b/doc/special_examples/random_cpp11.cpp
@@ -0,0 +1,14 @@
+#include <Eigen/Core>
+#include <iostream>
+#include <random>
+
+using namespace Eigen;
+
+int main() {
+  std::default_random_engine generator;
+  std::poisson_distribution<int> distribution(4.1);
+  auto poisson = [&] (int) {return distribution(generator);};
+
+  RowVectorXi v = RowVectorXi::NullaryExpr(10, poisson );
+  std::cout << v << "\n";
+}

From 3a4616d6e3d7d60af5ba569b0d36e954d45a6fb9 Mon Sep 17 00:00:00 2001
From: Martinho Fernandes <martinho.fernandes@gmail.com>
Date: Fri, 10 Jan 2014 11:02:11 +0100
Subject: [PATCH 14/48] Add C++11 allocator overloads to avoid implicit
 conversions.

---
 Eigen/src/Core/util/Memory.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index d177e8b5a..c1050e1ea 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -761,11 +761,27 @@ public:
         ::new( p ) T( value );
     }
 
+#if (__cplusplus >= 201103L)
+    template <typename U, typename... Args>
+    void construct( U* u, Args&&... args)
+    {
+        ::new( u ) U( std::forward<Args>( args )... );
+    }
+#endif
+
     void destroy( pointer p )
     {
         p->~T();
     }
 
+#if (__cplusplus >= 201103L)
+    template <typename U>
+    void destroy( U* u )
+    {
+        u->~U();
+    }
+#endif
+
     void deallocate( pointer p, size_type /*num*/ )
     {
         internal::aligned_free( p );

From 4ccff2d028e9e39c66c4f003c7c077a0805d3023 Mon Sep 17 00:00:00 2001
From: Martinho Fernandes <martinho.fernandes@gmail.com>
Date: Fri, 10 Jan 2014 11:20:40 +0100
Subject: [PATCH 15/48] Placement new must use void* to avoid user-specific
 overloads.

---
 Eigen/src/Core/util/Memory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index c1050e1ea..d1b08ed4c 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -765,7 +765,7 @@ public:
     template <typename U, typename... Args>
     void construct( U* u, Args&&... args)
     {
-        ::new( u ) U( std::forward<Args>( args )... );
+        ::new( static_cast<void*>(u) ) U( std::forward<Args>( args )... );
     }
 #endif
 

From aa0db35185f7eda94eb103b6bb92630c432512e5 Mon Sep 17 00:00:00 2001
From: Jitse Niesen <jitse@maths.leeds.ac.uk>
Date: Sat, 18 Jan 2014 01:16:17 +0000
Subject: [PATCH 16/48] Add doc page on computing Least Squares.

---
 doc/LeastSquares.dox                         | 70 ++++++++++++++++++++
 doc/Manual.dox                               |  2 +
 doc/TutorialLinearAlgebra.dox                | 11 +--
 doc/snippets/LeastSquaresNormalEquations.cpp |  4 ++
 doc/snippets/LeastSquaresQR.cpp              |  4 ++
 5 files changed, 86 insertions(+), 5 deletions(-)
 create mode 100644 doc/LeastSquares.dox
 create mode 100644 doc/snippets/LeastSquaresNormalEquations.cpp
 create mode 100644 doc/snippets/LeastSquaresQR.cpp

diff --git a/doc/LeastSquares.dox b/doc/LeastSquares.dox
new file mode 100644
index 000000000..e2191a22f
--- /dev/null
+++ b/doc/LeastSquares.dox
@@ -0,0 +1,70 @@
+namespace Eigen {
+
+/** \eigenManualPage LeastSquares Solving linear least squares systems
+
+This page describes how to solve linear least squares systems using %Eigen. An overdetermined system
+of equations, say \a Ax = \a b, has no solutions. In this case, it makes sense to search for the
+vector \a x which is closest to being a solution, in the sense that the difference \a Ax - \a b is
+as small as possible. This \a x is called the least square solution (if the Euclidean norm is used).
+
+The three methods discussed on this page are the SVD decomposition, the QR decomposition and normal
+equations. Of these, the SVD decomposition is generally the most accurate but the slowest, normal
+equations is the fastest but least accurate, and the QR decomposition is in between.
+
+\eigenAutoToc
+
+
+\section LeastSquaresSVD Using the SVD decomposition
+
+The \link JacobiSVD::solve() solve() \endlink method in the JacobiSVD class can be directly used to
+solve linear squares systems. It is not enough to compute only the singular values (the default for
+this class); you also need the singular vectors but the thin SVD decomposition suffices for
+computing least squares solutions:
+
+<table class="example">
+<tr><th>Example:</th><th>Output:</th></tr>
+<tr>
+  <td>\include TutorialLinAlgSVDSolve.cpp </td>
+  <td>\verbinclude TutorialLinAlgSVDSolve.out </td>
+</tr>
+</table>
+
+This is example from the page \link TutorialLinearAlgebra Linear algebra and decompositions \endlink.
+
+
+\section LeastSquaresQR Using the QR decomposition
+
+The solve() method in QR decomposition classes also computes the least squares solution. There are
+three QR decomposition classes: HouseholderQR (no pivoting, so fast but unstable),
+ColPivHouseholderQR (column pivoting, thus a bit slower but more accurate) and FullPivHouseholderQR
+(full pivoting, so slowest and most stable). Here is an example with column pivoting:
+
+<table class="example">
+<tr><th>Example:</th><th>Output:</th></tr>
+<tr>
+  <td>\include LeastSquaresQR.cpp </td>
+  <td>\verbinclude LeastSquaresQR.out </td>
+</tr>
+</table>
+
+
+\section LeastSquaresNormalEquations Using normal equations
+
+Finding the least squares solution of \a Ax = \a b is equivalent to solving the normal equation
+<i>A</i><sup>T</sup><i>Ax</i> = <i>A</i><sup>T</sup><i>b</i>. This leads to the following code
+
+<table class="example">
+<tr><th>Example:</th><th>Output:</th></tr>
+<tr>
+  <td>\include LeastSquaresNormalEquations.cpp </td>
+  <td>\verbinclude LeastSquaresNormalEquations.out </td>
+</tr>
+</table>
+
+If the matrix \a A is ill-conditioned, then this is not a good method, because the condition number
+of <i>A</i><sup>T</sup><i>A</i> is the square of the condition number of \a A. This means that you
+lose twice as many digits using normal equation than if you use the other methods.
+
+*/
+
+}
\ No newline at end of file
diff --git a/doc/Manual.dox b/doc/Manual.dox
index 3367982ca..0d3787675 100644
--- a/doc/Manual.dox
+++ b/doc/Manual.dox
@@ -96,6 +96,8 @@ namespace Eigen {
     \ingroup DenseLinearSolvers_chapter */
 /** \addtogroup TopicLinearAlgebraDecompositions
     \ingroup DenseLinearSolvers_chapter */
+/** \addtogroup LeastSquares 
+    \ingroup DenseLinearSolvers_chapter */
 
 /** \addtogroup DenseLinearSolvers_Reference
     \ingroup DenseLinearSolvers_chapter */
diff --git a/doc/TutorialLinearAlgebra.dox b/doc/TutorialLinearAlgebra.dox
index b09f3543e..e6c41fd70 100644
--- a/doc/TutorialLinearAlgebra.dox
+++ b/doc/TutorialLinearAlgebra.dox
@@ -167,8 +167,8 @@ Here is an example:
 
 \section TutorialLinAlgLeastsquares Least squares solving
 
-The best way to do least squares solving is with a SVD decomposition. Eigen provides one as the JacobiSVD class, and its solve()
-is doing least-squares solving.
+The most accurate method to do least squares solving is with a SVD decomposition. Eigen provides one
+as the JacobiSVD class, and its solve() is doing least-squares solving.
 
 Here is an example:
 <table class="example">
@@ -179,9 +179,10 @@ Here is an example:
 </tr>
 </table>
 
-Another way, potentially faster but less reliable, is to use a LDLT decomposition
-of the normal matrix. In any case, just read any reference text on least squares, and it will be very easy for you
-to implement any linear least squares computation on top of Eigen.
+Another methods, potentially faster but less reliable, are to use a Cholesky decomposition of the
+normal matrix or a QR decomposition. Our page on \link LeastSquares least squares solving \endlink
+has more details.
+
 
 \section TutorialLinAlgSeparateComputation Separating the computation from the construction
 
diff --git a/doc/snippets/LeastSquaresNormalEquations.cpp b/doc/snippets/LeastSquaresNormalEquations.cpp
new file mode 100644
index 000000000..997cf1715
--- /dev/null
+++ b/doc/snippets/LeastSquaresNormalEquations.cpp
@@ -0,0 +1,4 @@
+MatrixXf A = MatrixXf::Random(3, 2);
+VectorXf b = VectorXf::Random(3);
+cout << "The solution using normal equations is:\n"
+     << (A.transpose() * A).ldlt().solve(A.transpose() * b) << endl;
diff --git a/doc/snippets/LeastSquaresQR.cpp b/doc/snippets/LeastSquaresQR.cpp
new file mode 100644
index 000000000..6c9704547
--- /dev/null
+++ b/doc/snippets/LeastSquaresQR.cpp
@@ -0,0 +1,4 @@
+MatrixXf A = MatrixXf::Random(3, 2);
+VectorXf b = VectorXf::Random(3);
+cout << "The solution using the QR decomposition is:\n"
+     << A.colPivHouseholderQr().solve(b) << endl;

From 66f1c56aabc4ec4789405d11f544806312d49cd6 Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Sun, 19 Jan 2014 03:04:51 +0100
Subject: [PATCH 17/48] sparse_solve_retval_base::defaultEvalTo created
 extremely oversized temporary matrices in some cases

---
 Eigen/src/misc/SparseSolve.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Eigen/src/misc/SparseSolve.h b/Eigen/src/misc/SparseSolve.h
index 244bb8ec7..05caa9266 100644
--- a/Eigen/src/misc/SparseSolve.h
+++ b/Eigen/src/misc/SparseSolve.h
@@ -54,8 +54,10 @@ template<typename _DecompositionType, typename Rhs> struct sparse_solve_retval_b
       static const int NbColsAtOnce = 4;
       int rhsCols = m_rhs.cols();
       int size = m_rhs.rows();
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,rhsCols);
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmpX(size,rhsCols);
+      // the temporary matrices do not need more columns than NbColsAtOnce:
+      int tmpCols = (std::min)(rhsCols, NbColsAtOnce); 
+      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,tmpCols);
+      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmpX(size,tmpCols);
       for(int k=0; k<rhsCols; k+=NbColsAtOnce)
       {
         int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);

From deab937d4526c4542d13aa2654e8d898aab9a28b Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 24 Jan 2014 12:50:29 +0100
Subject: [PATCH 18/48] NVCC: fix closed-form eigenvalue decomposition,
 workaround gcc4.7/nvcc5.5 issue

---
 Eigen/src/Core/Diagonal.h                     |  7 ++++-
 .../src/Eigenvalues/SelfAdjointEigenSolver.h  |  6 ++--
 test/cuda_basic.cu                            | 31 ++++++++++++++++---
 3 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h
index 4436c6a69..b160479ab 100644
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@@ -77,7 +77,12 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
 
     EIGEN_DEVICE_FUNC
     inline Index rows() const
-    { return m_index.value()<0 ? (std::min<Index>)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min<Index>)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
+    {
+      EIGEN_USING_STD_MATH(min);
+      return m_index.value()<0 ? (min)(Index(m_matrix.cols()),Index(m_matrix.rows()+m_index.value()))
+                               : (min)(Index(m_matrix.rows()),Index(m_matrix.cols()-m_index.value()));
+      
+    }
 
     EIGEN_DEVICE_FUNC
     inline Index cols() const { return 1; }
diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
index 0c3425069..b8146d04d 100644
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@@ -756,7 +756,9 @@ struct direct_selfadjoint_eigenvalues<SolverType,2,false>
   EIGEN_DEVICE_FUNC
   static inline void run(SolverType& solver, const MatrixType& mat, int options)
   {
-    using std::sqrt;
+    EIGEN_USING_STD_MATH(max)
+    EIGEN_USING_STD_MATH(sqrt);
+    
     eigen_assert(mat.cols() == 2 && mat.cols() == mat.rows());
     eigen_assert((options&~(EigVecMask|GenEigMask))==0
             && (options&EigVecMask)!=EigVecMask
@@ -768,7 +770,7 @@ struct direct_selfadjoint_eigenvalues<SolverType,2,false>
   
     // map the matrix coefficients to [-1:1] to avoid over- and underflow.
     Scalar scale = mat.cwiseAbs().maxCoeff();
-    scale = (std::max)(scale,Scalar(1));
+    scale = (max)(scale,Scalar(1));
     MatrixType scaledMat = mat / scale;
     
     // Compute the eigenvalues
diff --git a/test/cuda_basic.cu b/test/cuda_basic.cu
index a062947a8..aa7f7a599 100644
--- a/test/cuda_basic.cu
+++ b/test/cuda_basic.cu
@@ -1,8 +1,16 @@
 
 
+// workaround issue between gcc >= 4.7 and cuda 5.5
+#if (defined __GNUC__) && (__GNUC__>4 || __GNUC_MINOR__>=7)
+  #undef _GLIBCXX_ATOMIC_BUILTINS
+  #undef _GLIBCXX_USE_INT128
+#endif
+
 #define EIGEN_TEST_NO_LONGDOUBLE
 #define EIGEN_TEST_NO_COMPLEX
 #define EIGEN_TEST_FUNC cuda_basic
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+
 #include "main.h"
 #include "cuda_common.h"
 
@@ -70,6 +78,17 @@ struct prod {
   }
 };
 
+template<typename T1, typename T2>
+struct diagonal {
+  EIGEN_DEVICE_FUNC
+  void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const
+  {
+    using namespace Eigen;
+    T1 x1(in+i);
+    Map<T2> res(out+i*T2::MaxSizeAtCompileTime);
+    res += x1.diagonal();
+  }
+};
 
 template<typename T>
 struct eigenvalues {
@@ -82,12 +101,11 @@ struct eigenvalues {
     Map<Vec> res(out+i*Vec::MaxSizeAtCompileTime);
     T A = M*M.adjoint();
     SelfAdjointEigenSolver<T> eig;
-    eig.computeDirect(A);
-    res = A.eigenvalues();
+    eig.computeDirect(M);
+    res = eig.eigenvalues();
   }
 };
 
-
 void test_cuda_basic()
 {
   ei_test_init_cuda();
@@ -110,7 +128,10 @@ void test_cuda_basic()
   CALL_SUBTEST( run_and_compare_to_cuda(prod<Matrix3f,Matrix3f>(), nthreads, in, out) );
   CALL_SUBTEST( run_and_compare_to_cuda(prod<Matrix4f,Vector4f>(), nthreads, in, out) );
   
-//   CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix3f>(), nthreads, in, out) );
-//   CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix2f>(), nthreads, in, out) );
+  CALL_SUBTEST( run_and_compare_to_cuda(diagonal<Matrix3f,Vector3f>(), nthreads, in, out) );
+  CALL_SUBTEST( run_and_compare_to_c<uda(diagonal<Matrix4f,Vector4f>(), nthreads, in, out) );
+  
+  CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix3f>(), nthreads, in, out) );
+  CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix2f>(), nthreads, in, out) );
 
 }

From 40c42d9788a66ac355c4206612e386229f7fc581 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 24 Jan 2014 12:51:05 +0100
Subject: [PATCH 19/48] NVCC: no need to enforce host compiler

---
 test/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 3c32f4b21..e1bff179d 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -302,7 +302,6 @@ find_package(CUDA)
 if(CUDA_FOUND)
   
   set(CUDA_PROPAGATE_HOST_FLAGS OFF)
-  set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
   cuda_include_directories(${CMAKE_CURRENT_BINARY_DIR})
   set(EIGEN_ADD_TEST_FILENAME_EXTENSION  "cu")
   

From afcfb560a2b92ad8b73f03fff80e691050fd5adc Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 24 Jan 2014 12:51:33 +0100
Subject: [PATCH 20/48] NVCC: add more debug info

---
 test/cuda_common.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/cuda_common.h b/test/cuda_common.h
index 0dd870b9c..9737693ac 100644
--- a/test/cuda_common.h
+++ b/test/cuda_common.h
@@ -86,12 +86,15 @@ void ei_test_init_cuda()
   cudaDeviceProp deviceProp;
   cudaGetDeviceProperties(&deviceProp, device);
   std::cout << "CUDA device info:\n";
+  std::cout << "  name:                        " << deviceProp.name << "\n";
   std::cout << "  capability:                  " << deviceProp.major << "." << deviceProp.minor << "\n";
   std::cout << "  multiProcessorCount:         " << deviceProp.multiProcessorCount << "\n";
   std::cout << "  maxThreadsPerMultiProcessor: " << deviceProp.maxThreadsPerMultiProcessor << "\n";
+  std::cout << "  warpSize:                    " << deviceProp.warpSize << "\n";
   std::cout << "  regsPerBlock:                " << deviceProp.regsPerBlock << "\n";
   std::cout << "  concurrentKernels:           " << deviceProp.concurrentKernels << "\n";
   std::cout << "  clockRate:                   " << deviceProp.clockRate << "\n";
+  std::cout << "  canMapHostMemory:            " << deviceProp.canMapHostMemory << "\n";
   std::cout << "  computeMode:                 " << deviceProp.computeMode << "\n";
 }
 

From 6cf938df53b5f26690a0e8034e10d11640d3a5c2 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 24 Jan 2014 13:24:30 +0100
Subject: [PATCH 21/48] Add a minimalistic page on CUDA with Eigen.

---
 doc/Manual.dox    |  1 +
 doc/UsingNVCC.dox | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 doc/UsingNVCC.dox

diff --git a/doc/Manual.dox b/doc/Manual.dox
index 0d3787675..0cbe792d5 100644
--- a/doc/Manual.dox
+++ b/doc/Manual.dox
@@ -11,6 +11,7 @@ namespace Eigen {
   - \subpage TopicCustomizingEigen
   - \subpage TopicMultiThreading
   - \subpage TopicUsingIntelMKL
+  - \subpage TopicCUDA
   - \subpage TopicTemplateKeyword
   - \subpage UserManual_UnderstandingEigen
 */
diff --git a/doc/UsingNVCC.dox b/doc/UsingNVCC.dox
new file mode 100644
index 000000000..e9df5de04
--- /dev/null
+++ b/doc/UsingNVCC.dox
@@ -0,0 +1,32 @@
+
+namespace Eigen {
+
+/** \page TopicCUDA Using Eigen in CUDA kernels
+
+\b Disclaimer: this page is about an \b experimental feature in %Eigen.
+
+Staring from CUDA 5.0, the CUDA compiler, \c nvcc, is able to properly parse %Eigen's code (almost).
+A few adaptations of the %Eigen's code already allows to use some parts of %Eigen in your own CUDA kernels.
+To this end you need the devel branch of %Eigen, CUDA 5.0 or greater with GCC.
+
+Known issues:
+
+ - \c nvcc with MS Visual Studio does not work (patch welcome)
+ 
+ - \c nvcc with \c clang does not work (patch welcome)
+ 
+ - \c nvcc 5.5 with gcc-4.7 (or greater) has issues with the standard \c <limits> header file. To workaround this, you can add the following before including any other files:
+   \code
+    // workaround issue between gcc >= 4.7 and cuda 5.5
+    #if (defined __GNUC__) && (__GNUC__>4 || __GNUC_MINOR__>=7)
+      #undef _GLIBCXX_ATOMIC_BUILTINS
+      #undef _GLIBCXX_USE_INT128
+    #endif
+   \endcode
+   
+ - On 64bits system Eigen uses \c long \c int as the default type for indexes and sizes. On CUDA device, it would make sense to default to 32 bits \c int.
+   However, to keep host and CUDA code compatible, this cannot be done automatically by %Eigen, and the user is thus required to define \c EIGEN_DEFAULT_DENSE_INDEX_TYPE to \c int throughout his code (or only for CUDA code if there is no interaction between host and CUDA code through %Eigen's object).
+
+*/
+
+}

From a7621809fe0fc7c65446ab9a83739ebe313004d4 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Sat, 25 Jan 2014 16:54:13 +0100
Subject: [PATCH 22/48] Remove useless register keyword, and optimize
 predux_min/max for SSE4

---
 Eigen/src/Core/arch/SSE/PacketMath.h           | 18 ++++++++++++++----
 .../Core/products/SelfadjointMatrixVector.h    |  6 +++---
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index f85d2e06e..f5a3dab52 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -504,13 +504,18 @@ template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
 }
 template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
 {
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+  return pfirst(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
   // after some experiments, it is seems this is the fastest way to implement it
   // for GCC (eg., it does not like using std::min after the pstore !!)
   EIGEN_ALIGN16 int aux[4];
   pstore(aux, a);
-  register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
-  register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
+  int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
+  int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
   return aux0<aux2 ? aux0 : aux2;
+#endif // EIGEN_VECTORIZE_SSE4_1
 }
 
 // max
@@ -525,13 +530,18 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
 }
 template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
 {
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+  return pfirst(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
   // after some experiments, it is seems this is the fastest way to implement it
   // for GCC (eg., it does not like using std::min after the pstore !!)
   EIGEN_ALIGN16 int aux[4];
   pstore(aux, a);
-  register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
-  register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
+  int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
+  int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
   return aux0>aux2 ? aux0 : aux2;
+#endif // EIGEN_VECTORIZE_SSE4_1
 }
 
 #if (defined __GNUC__)
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index c40e80f53..f698f67f9 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -79,8 +79,8 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
   for (Index j=FirstTriangular ? bound : 0;
        j<(FirstTriangular ? size : bound);j+=2)
   {
-    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
-    register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
+    const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
+    const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
 
     Scalar t0 = cjAlpha * rhs[j];
     Packet ptmp0 = pset1<Packet>(t0);
@@ -147,7 +147,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
   }
   for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
   {
-    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
+    const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
 
     Scalar t1 = cjAlpha * rhs[j];
     Scalar t2(0);

From febfc7b9b43866c44d49fe0e2b8ebcfc7b8b10be Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Wed, 29 Jan 2014 22:05:39 +0100
Subject: [PATCH 23/48] Fix bug #730: Path of OpenGL headers is different on
 MacOS

---
 unsupported/Eigen/OpenGLSupport | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/unsupported/Eigen/OpenGLSupport b/unsupported/Eigen/OpenGLSupport
index 4454bf820..c4090ab11 100644
--- a/unsupported/Eigen/OpenGLSupport
+++ b/unsupported/Eigen/OpenGLSupport
@@ -11,7 +11,12 @@
 #define EIGEN_OPENGL_MODULE
 
 #include <Eigen/Geometry>
-#include <GL/gl.h>
+
+#if defined(__APPLE_CC__)
+  #include <OpenGL/gl.h>
+#else
+  #include <GL/gl.h>
+#endif
 
 namespace Eigen {
 

From e722f36ffa80f131e19428bab275dbc2df522093 Mon Sep 17 00:00:00 2001
From: Hauke Heibel <hauke.heibel@gmail.com>
Date: Sat, 1 Feb 2014 20:49:48 +0100
Subject: [PATCH 24/48] =?UTF-8?q?Fixed=20issue=20#734=20(thanks=20to=20Phi?=
 =?UTF-8?q?lipp=20B=C3=BCttgenbach=20for=20reporting=20the=20issue=20and?=
 =?UTF-8?q?=20proposing=20a=20fix).=20Kept=20ColMajor=20layout=20if=20poss?=
 =?UTF-8?q?ible=20in=20order=20to=20keep=20derivatives=20of=20the=20same?=
 =?UTF-8?q?=20order=20adjacent=20in=20memory.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 unsupported/Eigen/src/Splines/SplineFwd.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/unsupported/Eigen/src/Splines/SplineFwd.h b/unsupported/Eigen/src/Splines/SplineFwd.h
index 49db8d35d..5d9bfacf8 100644
--- a/unsupported/Eigen/src/Splines/SplineFwd.h
+++ b/unsupported/Eigen/src/Splines/SplineFwd.h
@@ -31,6 +31,8 @@ namespace Eigen
 
       enum { OrderAtCompileTime = _Degree==Dynamic ? Dynamic : _Degree+1 /*!< The spline curve's order at compile-time. */ };
       enum { NumOfDerivativesAtCompileTime = OrderAtCompileTime /*!< The number of derivatives defined for the current spline. */ };
+      
+      enum { DerivativeMemoryLayout = Dimension==1 ? RowMajor : ColMajor /*!< The derivative type's memory layout. */ };
 
       /** \brief The data type used to store non-zero basis functions. */
       typedef Array<Scalar,1,OrderAtCompileTime> BasisVectorType;
@@ -39,7 +41,7 @@ namespace Eigen
       typedef Array<Scalar,Dynamic,Dynamic,RowMajor,NumOfDerivativesAtCompileTime,OrderAtCompileTime> BasisDerivativeType;
       
       /** \brief The data type used to store the spline's derivative values. */
-      typedef Array<Scalar,Dimension,Dynamic,ColMajor,Dimension,NumOfDerivativesAtCompileTime> DerivativeType;
+      typedef Array<Scalar,Dimension,Dynamic,DerivativeMemoryLayout,Dimension,NumOfDerivativesAtCompileTime> DerivativeType;
 
       /** \brief The point type the spline is representing. */
       typedef Array<Scalar,Dimension,1> PointType;
@@ -67,7 +69,7 @@ namespace Eigen
       typedef Array<_Scalar,Dynamic,Dynamic,RowMajor,NumOfDerivativesAtCompileTime,OrderAtCompileTime> BasisDerivativeType;
       
       /** \brief The data type used to store the spline's derivative values. */      
-      typedef Array<_Scalar,_Dim,Dynamic,ColMajor,_Dim,NumOfDerivativesAtCompileTime> DerivativeType;
+      typedef Array<_Scalar,_Dim,Dynamic,DerivativeMemoryLayout,_Dim,NumOfDerivativesAtCompileTime> DerivativeType;
     };
 
     /** \brief 2D float B-spline with dynamic degree. */

From 6c527bd811a4bbb122093fcc9ba9b7bd86963855 Mon Sep 17 00:00:00 2001
From: Hauke Heibel <hauke.heibel@gmail.com>
Date: Tue, 4 Feb 2014 07:02:34 +0100
Subject: [PATCH 25/48] Fixed assignment from QMatrix to Transform for compact
 storage.

---
 Eigen/src/Geometry/Transform.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h
index 419f90148..020868103 100644
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -699,9 +699,13 @@ template<typename Scalar, int Dim, int Mode,int Options>
 Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const QMatrix& other)
 {
   EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
-  m_matrix << other.m11(), other.m21(), other.dx(),
-              other.m12(), other.m22(), other.dy(),
-              0, 0, 1;
+  if (Mode == int(AffineCompact))
+    m_matrix << other.m11(), other.m21(), other.dx(),
+                other.m12(), other.m22(), other.dy();
+  else
+    m_matrix << other.m11(), other.m21(), other.dx(),
+                other.m12(), other.m22(), other.dy(),
+                0, 0, 1;
   return *this;
 }
 

From ff8d81762d6612b812db6385b1096c6a8b1006cc Mon Sep 17 00:00:00 2001
From: Jitse Niesen <jitse@maths.leeds.ac.uk>
Date: Thu, 6 Feb 2014 11:06:06 +0000
Subject: [PATCH 26/48] Fix bug #736: LDLT isPositive returns false for a
 positive semidefinite matrix Add unit test covering this case.

---
 Eigen/src/Cholesky/LDLT.h | 52 +++++++++++++++++++++++----------------
 test/cholesky.cpp         | 24 +++++++++++++++---
 2 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index 4faedd257..f34d26465 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -16,7 +16,10 @@
 namespace Eigen { 
 
 namespace internal {
-template<typename MatrixType, int UpLo> struct LDLT_Traits;
+  template<typename MatrixType, int UpLo> struct LDLT_Traits;
+
+  // PositiveSemiDef means positive semi-definite and non-zero; same for NegativeSemiDef
+  enum SignMatrix { PositiveSemiDef, NegativeSemiDef, ZeroSign, Indefinite };
 }
 
 /** \ingroup Cholesky_Module
@@ -69,7 +72,12 @@ template<typename _MatrixType, int _UpLo> class LDLT
       * The default constructor is useful in cases in which the user intends to
       * perform decompositions via LDLT::compute(const MatrixType&).
       */
-    LDLT() : m_matrix(), m_transpositions(), m_isInitialized(false) {}
+    LDLT() 
+      : m_matrix(), 
+        m_transpositions(), 
+        m_sign(internal::ZeroSign),
+        m_isInitialized(false) 
+    {}
 
     /** \brief Default Constructor with memory preallocation
       *
@@ -81,6 +89,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
       : m_matrix(size, size),
         m_transpositions(size),
         m_temporary(size),
+        m_sign(internal::ZeroSign),
         m_isInitialized(false)
     {}
 
@@ -93,6 +102,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
       : m_matrix(matrix.rows(), matrix.cols()),
         m_transpositions(matrix.rows()),
         m_temporary(matrix.rows()),
+        m_sign(internal::ZeroSign),
         m_isInitialized(false)
     {
       compute(matrix);
@@ -139,7 +149,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
     inline bool isPositive() const
     {
       eigen_assert(m_isInitialized && "LDLT is not initialized.");
-      return m_sign == 1;
+      return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign;
     }
     
     #ifdef EIGEN2_SUPPORT
@@ -153,7 +163,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
     inline bool isNegative(void) const
     {
       eigen_assert(m_isInitialized && "LDLT is not initialized.");
-      return m_sign == -1;
+      return m_sign == internal::NegativeSemiDef || m_sign == internal::ZeroSign;
     }
 
     /** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A.
@@ -235,7 +245,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
     MatrixType m_matrix;
     TranspositionType m_transpositions;
     TmpMatrixType m_temporary;
-    int m_sign;
+    internal::SignMatrix m_sign;
     bool m_isInitialized;
 };
 
@@ -246,7 +256,7 @@ template<int UpLo> struct ldlt_inplace;
 template<> struct ldlt_inplace<Lower>
 {
   template<typename MatrixType, typename TranspositionType, typename Workspace>
-  static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0)
+  static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)
   {
     using std::abs;
     typedef typename MatrixType::Scalar Scalar;
@@ -258,8 +268,9 @@ template<> struct ldlt_inplace<Lower>
     if (size <= 1)
     {
       transpositions.setIdentity();
-      if(sign)
-        *sign = numext::real(mat.coeff(0,0))>0 ? 1:-1;
+      if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef;
+      else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef;
+      else sign = ZeroSign;
       return true;
     }
 
@@ -284,7 +295,6 @@ template<> struct ldlt_inplace<Lower>
       if(biggest_in_corner < cutoff)
       {
         for(Index i = k; i < size; i++) transpositions.coeffRef(i) = i;
-        if(sign) *sign = 0;
         break;
       }
 
@@ -325,15 +335,15 @@ template<> struct ldlt_inplace<Lower>
       }
       if((rs>0) && (abs(mat.coeffRef(k,k)) > cutoff))
         A21 /= mat.coeffRef(k,k);
-      
-      if(sign)
-      {
-        // LDLT is not guaranteed to work for indefinite matrices, but let's try to get the sign right
-        int newSign = numext::real(mat.diagonal().coeff(index_of_biggest_in_corner)) > 0;
-        if(k == 0)
-          *sign = newSign;
-        else if(*sign != newSign)
-          *sign = 0;
+
+      RealScalar realAkk = numext::real(mat.coeffRef(k,k));
+      if (sign == PositiveSemiDef) {
+        if (realAkk < 0) sign = Indefinite;
+      } else if (sign == NegativeSemiDef) {
+        if (realAkk > 0) sign = Indefinite;
+      } else if (sign == ZeroSign) {
+        if (realAkk > 0) sign = PositiveSemiDef;
+        else if (realAkk < 0) sign = NegativeSemiDef;
       }
     }
 
@@ -399,7 +409,7 @@ template<> struct ldlt_inplace<Lower>
 template<> struct ldlt_inplace<Upper>
 {
   template<typename MatrixType, typename TranspositionType, typename Workspace>
-  static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0)
+  static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)
   {
     Transpose<MatrixType> matt(mat);
     return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
@@ -445,7 +455,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
   m_isInitialized = false;
   m_temporary.resize(size);
 
-  internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, &m_sign);
+  internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign);
 
   m_isInitialized = true;
   return *this;
@@ -473,7 +483,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
     for (Index i = 0; i < size; i++)
       m_transpositions.coeffRef(i) = i;
     m_temporary.resize(size);
-    m_sign = sigma>=0 ? 1 : -1;
+    m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef;
     m_isInitialized = true;
   }
 
diff --git a/test/cholesky.cpp b/test/cholesky.cpp
index 378525a83..b980dc572 100644
--- a/test/cholesky.cpp
+++ b/test/cholesky.cpp
@@ -263,8 +263,8 @@ template<typename MatrixType> void cholesky_bug241(const MatrixType& m)
 
 // LDLT is not guaranteed to work for indefinite matrices, but happens to work fine if matrix is diagonal.
 // This test checks that LDLT reports correctly that matrix is indefinite. 
-// See http://forum.kde.org/viewtopic.php?f=74&t=106942
-template<typename MatrixType> void cholesky_indefinite(const MatrixType& m)
+// See http://forum.kde.org/viewtopic.php?f=74&t=106942 and bug 736
+template<typename MatrixType> void cholesky_definiteness(const MatrixType& m)
 {
   eigen_assert(m.rows() == 2 && m.cols() == 2);
   MatrixType mat;
@@ -280,6 +280,24 @@ template<typename MatrixType> void cholesky_indefinite(const MatrixType& m)
     VERIFY(!ldlt.isNegative());
     VERIFY(!ldlt.isPositive());
   }
+  {
+    mat << 0, 0, 0, 0;
+    LDLT<MatrixType> ldlt(mat);
+    VERIFY(ldlt.isNegative());
+    VERIFY(ldlt.isPositive());
+  }
+  {
+    mat << 0, 0, 0, 1;
+    LDLT<MatrixType> ldlt(mat);
+    VERIFY(!ldlt.isNegative());
+    VERIFY(ldlt.isPositive());
+  }
+  {
+    mat << -1, 0, 0, 0;
+    LDLT<MatrixType> ldlt(mat);
+    VERIFY(ldlt.isNegative());
+    VERIFY(!ldlt.isPositive());
+  }
 }
 
 template<typename MatrixType> void cholesky_verify_assert()
@@ -309,7 +327,7 @@ void test_cholesky()
     CALL_SUBTEST_1( cholesky(Matrix<double,1,1>()) );
     CALL_SUBTEST_3( cholesky(Matrix2d()) );
     CALL_SUBTEST_3( cholesky_bug241(Matrix2d()) );
-    CALL_SUBTEST_3( cholesky_indefinite(Matrix2d()) );
+    CALL_SUBTEST_3( cholesky_definiteness(Matrix2d()) );
     CALL_SUBTEST_4( cholesky(Matrix3f()) );
     CALL_SUBTEST_5( cholesky(Matrix4d()) );
     s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE);

From 9e71ecbeec4df51b4f25985a6fabe5a06604a0d0 Mon Sep 17 00:00:00 2001
From: Naumov Maks <maksqwe1@ukr.net>
Date: Sat, 8 Feb 2014 10:40:51 +0000
Subject: [PATCH 27/48] fix typo in evalSolverSugarFunction()

---
 unsupported/test/polynomialsolver.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsupported/test/polynomialsolver.cpp b/unsupported/test/polynomialsolver.cpp
index 13f92169e..3c9265d98 100644
--- a/unsupported/test/polynomialsolver.cpp
+++ b/unsupported/test/polynomialsolver.cpp
@@ -120,7 +120,7 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
       bool found = false;
       for( size_t j=0; j<calc_realRoots.size()&& !found; ++j )
       {
-        if( internal::isApprox( calc_realRoots[i], real_roots[j] ), psPrec ){
+        if( internal::isApprox( calc_realRoots[i], real_roots[j], psPrec ) ){
           found = true; }
       }
       VERIFY( found );

From c1921ad3e22872ef6c2ea2d88cdf00b887324604 Mon Sep 17 00:00:00 2001
From: Jitse Niesen <jitse@maths.leeds.ac.uk>
Date: Sat, 8 Feb 2014 20:31:35 +0000
Subject: [PATCH 28/48] Remove unused typedef in polynomialsolver test.

---
 unsupported/test/polynomialsolver.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/unsupported/test/polynomialsolver.cpp b/unsupported/test/polynomialsolver.cpp
index 3c9265d98..680ff6d31 100644
--- a/unsupported/test/polynomialsolver.cpp
+++ b/unsupported/test/polynomialsolver.cpp
@@ -106,7 +106,6 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
 
     typedef typename POLYNOMIAL::Scalar                 Scalar;
     typedef typename REAL_ROOTS::Scalar                 Real;
-    typedef PolynomialSolver<Scalar, Deg >              PolynomialSolverType;
 
     //Test realRoots
     std::vector< Real > calc_realRoots;

From e170e7070babbdb164cbda9289419e264d3c2645 Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Tue, 11 Feb 2014 17:27:14 +0100
Subject: [PATCH 29/48] Added examples for casting, made better examples for
 Maps

---
 doc/AsciiQuickReference.txt | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/doc/AsciiQuickReference.txt b/doc/AsciiQuickReference.txt
index d3bfd439c..4c8fe2f47 100644
--- a/doc/AsciiQuickReference.txt
+++ b/doc/AsciiQuickReference.txt
@@ -163,13 +163,25 @@ x.squaredNorm()           // dot(x, x)   Note the equivalence is not true for co
 x.dot(y)                  // dot(x, y)
 x.cross(y)                // cross(x, y) Requires #include <Eigen/Geometry>
 
+//// Type conversion
+// Eigen                           // Matlab
+A.cast<double>();                  // double(A)
+A.cast<float>();                   // single(A)
+A.cast<int>();                     // int32(A)
+// if the original type equals destination type, no work is done
+
+// Note that for most operations Eigen requires all operands to have the same type:
+MatrixXf F = MatrixXf::Zero(3,3);
+A += F;                // illegal in Eigen. In Matlab A = A+F is allowed
+A += F.cast<double>(); // F converted to double and then added (generally, conversion happens on-the-fly)
+
 // Eigen can map existing memory into Eigen matrices.
 float array[3];
-Map<Vector3f>(array, 3).fill(10);
-int data[4] = 1, 2, 3, 4;
-Matrix2i mat2x2(data);
-MatrixXi mat2x2 = Map<Matrix2i>(data);
-MatrixXi mat2x2 = Map<MatrixXi>(data, 2, 2);
+Vector3f::Map(array).fill(10);            // create a temporary Map over array and sets entries to 10
+int data[4] = {1, 2, 3, 4};
+Matrix2i mat2x2(data);                    // copies data into mat2x2
+Matrix2i::Map(data) = 2*mat2x2;           // overwrite elements of data with 2*mat2x2
+MatrixXi::Map(data, 2, 2) += mat2x2;      // adds mat2x2 to elements of data (alternative syntax if size is not know at compile time)
 
 // Solve Ax = b. Result stored in x. Matlab: x = A \ b.
 x = A.ldlt().solve(b));  // A sym. p.s.d.    #include <Eigen/Cholesky>

From 31c63ef0b42ae9d6249281c70a0268ddb4eba10d Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Wed, 12 Feb 2014 13:37:23 +0100
Subject: [PATCH 30/48] fix compilation of Transform * UniformScaling

---
 Eigen/src/Geometry/Transform.h | 4 ++--
 test/geo_transformations.cpp   | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h
index 020868103..0a5012cfe 100644
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -530,9 +530,9 @@ public:
 
   inline Transform& operator=(const UniformScaling<Scalar>& t);
   inline Transform& operator*=(const UniformScaling<Scalar>& s) { return scale(s.factor()); }
-  inline Transform<Scalar,Dim,(int(Mode)==int(Isometry)?Affine:Isometry)> operator*(const UniformScaling<Scalar>& s) const
+  inline Transform<Scalar,Dim,(int(Mode)==int(Isometry)?Affine:Mode)> operator*(const UniformScaling<Scalar>& s) const
   {
-    Transform<Scalar,Dim,(int(Mode)==int(Isometry)?Affine:Isometry),Options> res = *this;
+    Transform<Scalar,Dim,(int(Mode)==int(Isometry)?Affine:Mode),Options> res = *this;
     res.scale(s.factor());
     return res;
   }
diff --git a/test/geo_transformations.cpp b/test/geo_transformations.cpp
index 35ae67ebe..ee3030b5d 100644
--- a/test/geo_transformations.cpp
+++ b/test/geo_transformations.cpp
@@ -279,6 +279,13 @@ template<typename Scalar, int Mode, int Options> void transformations()
   t1 = Eigen::Scaling(s0,s0,s0) * t1;
   VERIFY_IS_APPROX(t0.matrix(), t1.matrix());
 
+  t0 = t3;
+  t0.scale(s0);
+  t1 = t3 * Eigen::Scaling(s0);
+  VERIFY_IS_APPROX(t0.matrix(), t1.matrix());
+  t0.prescale(s0);
+  t1 = Eigen::Scaling(s0) * t1;
+  VERIFY_IS_APPROX(t0.matrix(), t1.matrix());
 
   t0.setIdentity();
   t0.prerotate(q1).prescale(v0).pretranslate(v0);

From 7ea6ef89693b0d70db29ccad1a3cc7771195e318 Mon Sep 17 00:00:00 2001
From: Jitse Niesen <jitse@maths.leeds.ac.uk>
Date: Wed, 12 Feb 2014 14:03:39 +0000
Subject: [PATCH 31/48] Fix documentation of MatrixBase::applyOnTheLeft (bug
 #739) Add examples; move methods from EigenBase.h to MatrixBase.h

---
 Eigen/src/Core/EigenBase.h                  | 30 --------------
 Eigen/src/Core/MatrixBase.h                 | 45 +++++++++++++++++++++
 doc/snippets/MatrixBase_applyOnTheLeft.cpp  |  7 ++++
 doc/snippets/MatrixBase_applyOnTheRight.cpp |  9 +++++
 4 files changed, 61 insertions(+), 30 deletions(-)
 create mode 100644 doc/snippets/MatrixBase_applyOnTheLeft.cpp
 create mode 100644 doc/snippets/MatrixBase_applyOnTheRight.cpp

diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h
index a25e823ab..1a577c2dc 100644
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -141,36 +141,6 @@ Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
   return derived();
 }
 
-/** replaces \c *this by \c *this * \a other.
-  *
-  * \returns a reference to \c *this
-  */
-template<typename Derived>
-template<typename OtherDerived>
-inline Derived&
-MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
-{
-  other.derived().applyThisOnTheRight(derived());
-  return derived();
-}
-
-/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=().
-  */
-template<typename Derived>
-template<typename OtherDerived>
-inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
-{
-  other.derived().applyThisOnTheRight(derived());
-}
-
-/** replaces \c *this by \c *this * \a other. */
-template<typename Derived>
-template<typename OtherDerived>
-inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
-{
-  other.derived().applyThisOnTheLeft(derived());
-}
-
 } // end namespace Eigen
 
 #endif // EIGEN_EIGENBASE_H
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 61798317e..49b69f873 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -564,6 +564,51 @@ template<typename Derived> class MatrixBase
     {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
 };
 
+
+/***************************************************************************
+* Implementation of matrix base methods
+***************************************************************************/
+
+/** replaces \c *this by \c *this * \a other.
+  *
+  * \returns a reference to \c *this
+  *
+  * Example: \include MatrixBase_applyOnTheRight.cpp
+  * Output: \verbinclude MatrixBase_applyOnTheRight.out
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline Derived&
+MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheRight(derived());
+  return derived();
+}
+
+/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=().
+  *
+  * Example: \include MatrixBase_applyOnTheRight.cpp
+  * Output: \verbinclude MatrixBase_applyOnTheRight.out
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheRight(derived());
+}
+
+/** replaces \c *this by \a other * \c *this.
+  *
+  * Example: \include MatrixBase_applyOnTheLeft.cpp
+  * Output: \verbinclude MatrixBase_applyOnTheLeft.out
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheLeft(derived());
+}
+
 } // end namespace Eigen
 
 #endif // EIGEN_MATRIXBASE_H
diff --git a/doc/snippets/MatrixBase_applyOnTheLeft.cpp b/doc/snippets/MatrixBase_applyOnTheLeft.cpp
new file mode 100644
index 000000000..6398c873a
--- /dev/null
+++ b/doc/snippets/MatrixBase_applyOnTheLeft.cpp
@@ -0,0 +1,7 @@
+Matrix3f A = Matrix3f::Random(3,3), B;
+B << 0,1,0,  
+     0,0,1,  
+     1,0,0;
+cout << "At start, A = " << endl << A << endl;
+A.applyOnTheLeft(B); 
+cout << "After applyOnTheLeft, A = " << endl << A << endl;
diff --git a/doc/snippets/MatrixBase_applyOnTheRight.cpp b/doc/snippets/MatrixBase_applyOnTheRight.cpp
new file mode 100644
index 000000000..e4b71b2d8
--- /dev/null
+++ b/doc/snippets/MatrixBase_applyOnTheRight.cpp
@@ -0,0 +1,9 @@
+Matrix3f A = Matrix3f::Random(3,3), B;
+B << 0,1,0,  
+     0,0,1,  
+     1,0,0;
+cout << "At start, A = " << endl << A << endl;
+A *= B;
+cout << "After A *= B, A = " << endl << A << endl;
+A.applyOnTheRight(B);  // equivalent to A *= B
+cout << "After applyOnTheRight, A = " << endl << A << endl;

From 14422decc2583e556352408983f4f8fda3054c70 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Thu, 13 Feb 2014 09:21:13 +0100
Subject: [PATCH 32/48] Fix Fortran compiler detection

---
 blas/CMakeLists.txt          | 3 +++
 cmake/language_support.cmake | 2 +-
 lapack/CMakeLists.txt        | 3 +++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt
index c35a2fdbe..a9bc05137 100644
--- a/blas/CMakeLists.txt
+++ b/blas/CMakeLists.txt
@@ -7,6 +7,9 @@ workaround_9220(Fortran EIGEN_Fortran_COMPILER_WORKS)
 
 if(EIGEN_Fortran_COMPILER_WORKS)
   enable_language(Fortran OPTIONAL)
+  if(NOT CMAKE_Fortran_COMPILER)
+    set(EIGEN_Fortran_COMPILER_WORKS OFF)
+  endif()
 endif()
 
 add_custom_target(blas)
diff --git a/cmake/language_support.cmake b/cmake/language_support.cmake
index 2ca303c92..d687b71f6 100644
--- a/cmake/language_support.cmake
+++ b/cmake/language_support.cmake
@@ -23,7 +23,7 @@ function(workaround_9220 language language_works)
   #message("DEBUG: language = ${language}")
   set(text
     "project(test NONE)
-    cmake_minimum_required(VERSION 2.6.0)
+    cmake_minimum_required(VERSION 2.8.0)
     set (CMAKE_Fortran_FLAGS \"${CMAKE_Fortran_FLAGS}\")
     set (CMAKE_EXE_LINKER_FLAGS \"${CMAKE_EXE_LINKER_FLAGS}\")
     enable_language(${language} OPTIONAL)
diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt
index 7e7444326..9883d4c72 100644
--- a/lapack/CMakeLists.txt
+++ b/lapack/CMakeLists.txt
@@ -7,6 +7,9 @@ workaround_9220(Fortran EIGEN_Fortran_COMPILER_WORKS)
 
 if(EIGEN_Fortran_COMPILER_WORKS)
   enable_language(Fortran OPTIONAL)
+  if(NOT CMAKE_Fortran_COMPILER)
+    set(EIGEN_Fortran_COMPILER_WORKS OFF)
+  endif()
 endif()
 
 add_custom_target(lapack)

From 32915806305081d837711305bcf57508714d0068 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Thu, 13 Feb 2014 15:44:01 +0100
Subject: [PATCH 33/48] Fix bug #740: overflow issue in stableNorm

---
 Eigen/src/Core/StableNorm.h | 27 ++++++++++++++++++++-------
 test/stable_norm.cpp        | 14 +++++++++++---
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h
index c219e2f53..525620bad 100644
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -17,16 +17,29 @@ namespace internal {
 template<typename ExpressionType, typename Scalar>
 inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
 {
-  Scalar max = bl.cwiseAbs().maxCoeff();
-  if (max>scale)
+  using std::max;
+  Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
+  
+  if (maxCoeff>scale)
   {
-    ssq = ssq * numext::abs2(scale/max);
-    scale = max;
-    invScale = Scalar(1)/scale;
+    ssq = ssq * numext::abs2(scale/maxCoeff);
+    Scalar tmp = Scalar(1)/maxCoeff;
+    if(tmp > NumTraits<Scalar>::highest())
+    {
+      invScale = NumTraits<Scalar>::highest();
+      scale = Scalar(1)/invScale;
+    }
+    else
+    {
+      scale = maxCoeff;
+      invScale = tmp;
+    }
   }
-  // TODO if the max is much much smaller than the current scale,
+  
+  // TODO if the maxCoeff is much much smaller than the current scale,
   // then we can neglect this sub vector
-  ssq += (bl*invScale).squaredNorm();
+  if(scale>Scalar(0)) // if scale==0, then bl is 0 
+    ssq += (bl*invScale).squaredNorm();
 }
 
 template<typename Derived>
diff --git a/test/stable_norm.cpp b/test/stable_norm.cpp
index c09fc17b7..364170acd 100644
--- a/test/stable_norm.cpp
+++ b/test/stable_norm.cpp
@@ -55,8 +55,16 @@ template<typename MatrixType> void stable_norm(const MatrixType& m)
   Index rows = m.rows();
   Index cols = m.cols();
 
-  Scalar big = internal::random<Scalar>() * ((std::numeric_limits<RealScalar>::max)() * RealScalar(1e-4));
-  Scalar small = internal::random<Scalar>() * ((std::numeric_limits<RealScalar>::min)() * RealScalar(1e4));
+  // get a non-zero random factor
+  Scalar factor = internal::random<Scalar>();
+  while(factor<RealScalar(1e-3))
+    factor = internal::random<Scalar>();
+  Scalar big = factor * ((std::numeric_limits<RealScalar>::max)() * RealScalar(1e-4));
+  
+  factor = internal::random<Scalar>();
+  while(factor<RealScalar(1e-3))
+    factor = internal::random<Scalar>();
+  Scalar small = factor * ((std::numeric_limits<RealScalar>::min)() * RealScalar(1e4));
 
   MatrixType  vzero = MatrixType::Zero(rows, cols),
               vrand = MatrixType::Random(rows, cols),
@@ -91,7 +99,7 @@ template<typename MatrixType> void stable_norm(const MatrixType& m)
   VERIFY_IS_APPROX(vsmall.blueNorm(),   sqrt(size)*abs(small));
   VERIFY_IS_APPROX(vsmall.hypotNorm(),  sqrt(size)*abs(small));
 
-// Test compilation of cwise() version
+  // Test compilation of cwise() version
   VERIFY_IS_APPROX(vrand.colwise().stableNorm(),      vrand.colwise().norm());
   VERIFY_IS_APPROX(vrand.colwise().blueNorm(),        vrand.colwise().norm());
   VERIFY_IS_APPROX(vrand.colwise().hypotNorm(),       vrand.colwise().norm());

From 0715d49908e59487e040d49e03319b0a9093b969 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Thu, 13 Feb 2014 15:49:54 +0100
Subject: [PATCH 34/48] Fix stable_norm unit test for complexes

---
 test/stable_norm.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/stable_norm.cpp b/test/stable_norm.cpp
index 364170acd..549f91fbf 100644
--- a/test/stable_norm.cpp
+++ b/test/stable_norm.cpp
@@ -57,12 +57,12 @@ template<typename MatrixType> void stable_norm(const MatrixType& m)
 
   // get a non-zero random factor
   Scalar factor = internal::random<Scalar>();
-  while(factor<RealScalar(1e-3))
+  while(numext::abs2(factor)<RealScalar(1e-4))
     factor = internal::random<Scalar>();
   Scalar big = factor * ((std::numeric_limits<RealScalar>::max)() * RealScalar(1e-4));
   
   factor = internal::random<Scalar>();
-  while(factor<RealScalar(1e-3))
+  while(numext::abs2(factor)<RealScalar(1e-4))
     factor = internal::random<Scalar>();
   Scalar small = factor * ((std::numeric_limits<RealScalar>::min)() * RealScalar(1e4));
 

From c0e08e9e4b9307553ce1d5b321c6a1b2590ec56e Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Thu, 13 Feb 2014 15:53:51 +0100
Subject: [PATCH 35/48] fix stable norm benchmark

---
 bench/bench_norm.cpp | 84 +++++++++++++++++++++++---------------------
 1 file changed, 44 insertions(+), 40 deletions(-)

diff --git a/bench/bench_norm.cpp b/bench/bench_norm.cpp
index 806db292c..398fef835 100644
--- a/bench/bench_norm.cpp
+++ b/bench/bench_norm.cpp
@@ -32,25 +32,25 @@ EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v)
   Scalar ssq = 1;
   for (int i=0;i<n;++i)
   {
-    Scalar ax = internal::abs(v.coeff(i));
+    Scalar ax = std::abs(v.coeff(i));
     if (scale >= ax)
     {
-      ssq += internal::abs2(ax/scale);
+      ssq += numext::abs2(ax/scale);
     }
     else
     {
-      ssq = Scalar(1) + ssq * internal::abs2(scale/ax);
+      ssq = Scalar(1) + ssq * numext::abs2(scale/ax);
       scale = ax;
     }
   }
-  return scale * internal::sqrt(ssq);
+  return scale * std::sqrt(ssq);
 }
 
 template<typename T>
 EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v)
 {
   typedef typename T::Scalar Scalar;
-  Scalar s = v.cwise().abs().maxCoeff();
+  Scalar s = v.array().abs().maxCoeff();
   return s*(v/s).norm();
 }
 
@@ -73,16 +73,20 @@ EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v)
       v(i) = v(2*i) + v(2*i+1);
     n = n/2;
   }
-  return internal::sqrt(v(0));
+  return std::sqrt(v(0));
 }
 
+namespace Eigen {
+namespace internal {
 #ifdef EIGEN_VECTORIZE
-Packet4f internal::plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); }
-Packet2d internal::plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); }
+Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); }
+Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); }
 
-Packet4f internal::pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); }
-Packet2d internal::pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); }
+Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); }
+Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); }
 #endif
+}
+}
 
 template<typename T>
 EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
@@ -126,7 +130,7 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
 
     overfl  = rbig*s2m;          // overfow boundary for abig
     eps     = std::pow(ibeta, 1-it);
-    relerr  = internal::sqrt(eps);      // tolerance for neglecting asml
+    relerr  = std::sqrt(eps);      // tolerance for neglecting asml
     abig    = 1.0/eps - 1.0;
     if (Scalar(nbig)>abig)  nmax = abig;  // largest safe n
     else                    nmax = nbig;
@@ -134,13 +138,13 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
 
   typedef typename internal::packet_traits<Scalar>::type Packet;
   const int ps = internal::packet_traits<Scalar>::size;
-  Packet pasml = internal::pset1(Scalar(0));
-  Packet pamed = internal::pset1(Scalar(0));
-  Packet pabig = internal::pset1(Scalar(0));
-  Packet ps2m = internal::pset1(s2m);
-  Packet ps1m = internal::pset1(s1m);
-  Packet pb2  = internal::pset1(b2);
-  Packet pb1  = internal::pset1(b1);
+  Packet pasml = internal::pset1<Packet>(Scalar(0));
+  Packet pamed = internal::pset1<Packet>(Scalar(0));
+  Packet pabig = internal::pset1<Packet>(Scalar(0));
+  Packet ps2m = internal::pset1<Packet>(s2m);
+  Packet ps1m = internal::pset1<Packet>(s1m);
+  Packet pb2  = internal::pset1<Packet>(b2);
+  Packet pb1  = internal::pset1<Packet>(b1);
   for(int j=0; j<v.size(); j+=ps)
   {
     Packet ax = internal::pabs(v.template packet<Aligned>(j));
@@ -170,7 +174,7 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
   Scalar amed = internal::predux(pamed);
   if(abig > Scalar(0))
   {
-    abig = internal::sqrt(abig);
+    abig = std::sqrt(abig);
     if(abig > overfl)
     {
       eigen_assert(false && "overflow");
@@ -179,7 +183,7 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
     if(amed > Scalar(0))
     {
       abig = abig/s2m;
-      amed = internal::sqrt(amed);
+      amed = std::sqrt(amed);
     }
     else
     {
@@ -191,24 +195,24 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
   {
     if (amed > Scalar(0))
     {
-      abig = internal::sqrt(amed);
-      amed = internal::sqrt(asml) / s1m;
+      abig = std::sqrt(amed);
+      amed = std::sqrt(asml) / s1m;
     }
     else
     {
-      return internal::sqrt(asml)/s1m;
+      return std::sqrt(asml)/s1m;
     }
   }
   else
   {
-    return internal::sqrt(amed);
+    return std::sqrt(amed);
   }
   asml = std::min(abig, amed);
   abig = std::max(abig, amed);
   if(asml <= abig*relerr)
     return abig;
   else
-    return abig * internal::sqrt(Scalar(1) + internal::abs2(asml/abig));
+    return abig * std::sqrt(Scalar(1) + numext::abs2(asml/abig));
   #endif
 }
 
@@ -224,22 +228,22 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
     for (int i=0; i<iters; ++i) NRM(vd); \
     td.stop(); \
   } \
-  for (int k=0; k<std::max(1,tries/3); ++k) { \
+  /*for (int k=0; k<std::max(1,tries/3); ++k) { \
     tcf.start(); \
     for (int i=0; i<iters; ++i) NRM(vcf); \
     tcf.stop(); \
-  } \
+  } */\
   std::cout << #NRM << "\t" << tf.value() << "   " << td.value() <<  "    " << tcf.value() << "\n"; \
 }
 
 void check_accuracy(double basef, double based, int s)
 {
-  double yf = basef * internal::abs(internal::random<double>());
-  double yd = based * internal::abs(internal::random<double>());
+  double yf = basef * std::abs(internal::random<double>());
+  double yd = based * std::abs(internal::random<double>());
   VectorXf vf = VectorXf::Ones(s) * yf;
   VectorXd vd = VectorXd::Ones(s) * yd;
 
-  std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
+  std::cout << "reference\t" << std::sqrt(double(s))*yf << "\t" << std::sqrt(double(s))*yd << "\n";
   std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n";
   std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n";
   std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n";
@@ -255,8 +259,8 @@ void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s)
   VectorXd vd(s);
   for (int i=0; i<s; ++i)
   {
-    vf[i] = internal::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0,ef1));
-    vd[i] = internal::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0,ed1));
+    vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0,ef1));
+    vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0,ed1));
   }
 
   //std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
@@ -321,10 +325,10 @@ int main(int argc, char** argv)
     VectorXcf vcf = VectorXcf::Random(1024*1024*32) * y;
     BENCH_PERF(sqsumNorm);
     BENCH_PERF(blueNorm);
-//     BENCH_PERF(pblueNorm);
-//     BENCH_PERF(lapackNorm);
-//     BENCH_PERF(hypotNorm);
-//     BENCH_PERF(twopassNorm);
+    BENCH_PERF(pblueNorm);
+    BENCH_PERF(lapackNorm);
+    BENCH_PERF(hypotNorm);
+    BENCH_PERF(twopassNorm);
     BENCH_PERF(bl2passNorm);
   }
 
@@ -336,10 +340,10 @@ int main(int argc, char** argv)
     VectorXcf vcf = VectorXcf::Random(512) * y;
     BENCH_PERF(sqsumNorm);
     BENCH_PERF(blueNorm);
-//     BENCH_PERF(pblueNorm);
-//     BENCH_PERF(lapackNorm);
-//     BENCH_PERF(hypotNorm);
-//     BENCH_PERF(twopassNorm);
+    BENCH_PERF(pblueNorm);
+    BENCH_PERF(lapackNorm);
+    BENCH_PERF(hypotNorm);
+    BENCH_PERF(twopassNorm);
     BENCH_PERF(bl2passNorm);
   }
 }

From 0b1430ae10a5d6e865424d9fb9e0182bf60b3df8 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Thu, 13 Feb 2014 23:58:28 +0100
Subject: [PATCH 36/48] Fix propagation of index type

---
 .../ConservativeSparseSparseProduct.h         | 20 +++++++++----------
 Eigen/src/SparseCore/SparseMatrixBase.h       |  4 ++--
 Eigen/src/SparseCore/SparseProduct.h          |  5 +++--
 .../SparseSparseProductWithPruning.h          | 11 +++++-----
 4 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
index 4b13f08d4..cd996d5ba 100644
--- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
@@ -134,8 +134,8 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,C
 
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
     ColMajorMatrix resCol(lhs.rows(),rhs.cols());
     internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);
     // sort the non zeros:
@@ -149,7 +149,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-     typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
+     typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
      RowMajorMatrix rhsRow = rhs;
      RowMajorMatrix resRow(lhs.rows(), rhs.cols());
      internal::conservative_sparse_sparse_product_impl<RowMajorMatrix,Lhs,RowMajorMatrix>(rhsRow, lhs, resRow);
@@ -162,7 +162,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
     RowMajorMatrix lhsRow = lhs;
     RowMajorMatrix resRow(lhs.rows(), rhs.cols());
     internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorMatrix,RowMajorMatrix>(rhs, lhsRow, resRow);
@@ -175,7 +175,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,R
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
     RowMajorMatrix resRow(lhs.rows(), rhs.cols());
     internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);
     res = resRow;
@@ -190,7 +190,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,C
 
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
     ColMajorMatrix resCol(lhs.rows(), rhs.cols());
     internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);
     res = resCol;
@@ -202,7 +202,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
     ColMajorMatrix lhsCol = lhs;
     ColMajorMatrix resCol(lhs.rows(), rhs.cols());
     internal::conservative_sparse_sparse_product_impl<ColMajorMatrix,Rhs,ColMajorMatrix>(lhsCol, rhs, resCol);
@@ -215,7 +215,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
     ColMajorMatrix rhsCol = rhs;
     ColMajorMatrix resCol(lhs.rows(), rhs.cols());
     internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorMatrix,ColMajorMatrix>(lhs, rhsCol, resCol);
@@ -228,8 +228,8 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,R
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
     RowMajorMatrix resRow(lhs.rows(),rhs.cols());
     internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);
     // sort the non zeros:
diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h
index 706f699b8..bbcf7fb1c 100644
--- a/Eigen/src/SparseCore/SparseMatrixBase.h
+++ b/Eigen/src/SparseCore/SparseMatrixBase.h
@@ -302,8 +302,8 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
         }
         else
         {
-          SparseMatrix<Scalar, RowMajorBit> trans = m;
-          s << static_cast<const SparseMatrixBase<SparseMatrix<Scalar, RowMajorBit> >&>(trans);
+          SparseMatrix<Scalar, RowMajorBit, Index> trans = m;
+          s << static_cast<const SparseMatrixBase<SparseMatrix<Scalar, RowMajorBit, Index> >&>(trans);
         }
       }
       return s;
diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h
index 70b6480ef..cf7663070 100644
--- a/Eigen/src/SparseCore/SparseProduct.h
+++ b/Eigen/src/SparseCore/SparseProduct.h
@@ -16,6 +16,7 @@ template<typename Lhs, typename Rhs>
 struct SparseSparseProductReturnType
 {
   typedef typename internal::traits<Lhs>::Scalar Scalar;
+  typedef typename internal::traits<Lhs>::Index Index;
   enum {
     LhsRowMajor = internal::traits<Lhs>::Flags & RowMajorBit,
     RhsRowMajor = internal::traits<Rhs>::Flags & RowMajorBit,
@@ -24,11 +25,11 @@ struct SparseSparseProductReturnType
   };
 
   typedef typename internal::conditional<TransposeLhs,
-    SparseMatrix<Scalar,0>,
+    SparseMatrix<Scalar,0,Index>,
     typename internal::nested<Lhs,Rhs::RowsAtCompileTime>::type>::type LhsNested;
 
   typedef typename internal::conditional<TransposeRhs,
-    SparseMatrix<Scalar,0>,
+    SparseMatrix<Scalar,0,Index>,
     typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type>::type RhsNested;
 
   typedef SparseSparseProduct<LhsNested, RhsNested> Type;
diff --git a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
index 70857c7b6..1dccdfe1b 100644
--- a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
+++ b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
@@ -100,7 +100,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,C
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
   {
     // we need a col-major matrix to hold the result
-    typedef SparseMatrix<typename ResultType::Scalar> SparseTemporaryType;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> SparseTemporaryType;
     SparseTemporaryType _res(res.rows(), res.cols());
     internal::sparse_sparse_product_with_pruning_impl<Lhs,Rhs,SparseTemporaryType>(lhs, rhs, _res, tolerance);
     res = _res;
@@ -126,10 +126,11 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,R
   typedef typename ResultType::RealScalar RealScalar;
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
-    ColMajorMatrix colLhs(lhs);
-    ColMajorMatrix colRhs(rhs);
-    internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrix,ColMajorMatrix,ResultType>(colLhs, colRhs, res, tolerance);
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::Index> ColMajorMatrixLhs;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::Index> ColMajorMatrixRhs;
+    ColMajorMatrixLhs colLhs(lhs);
+    ColMajorMatrixRhs colRhs(rhs);
+    internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,ColMajorMatrixRhs,ResultType>(colLhs, colRhs, res, tolerance);
 
     // let's transpose the product to get a column x column product
 //     typedef SparseMatrix<typename ResultType::Scalar> SparseTemporaryType;

From 97965dde9bc079d042d3c4037e1927bee0df443f Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 14 Feb 2014 00:04:38 +0100
Subject: [PATCH 37/48] alloca is not necessarily alligned on windows

---
 Eigen/src/Core/util/Memory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index d177e8b5a..1f30fba1c 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -608,7 +608,7 @@ template<typename T> class aligned_stack_memory_handler
   */
 #ifdef EIGEN_ALLOCA
 
-  #ifdef __arm__
+  #if defined(__arm__) || defined(_WIN32)
     #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+16)) & ~(size_t(15))) + 16)
   #else
     #define EIGEN_ALIGNED_ALLOCA EIGEN_ALLOCA

From 6df3bee68751bd0b25c19cf4aae48c41259a4066 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 14 Feb 2014 09:58:30 +0100
Subject: [PATCH 38/48] reduce false negative in the qr unit test

---
 test/qr.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/test/qr.cpp b/test/qr.cpp
index a79e0dd34..98738777f 100644
--- a/test/qr.cpp
+++ b/test/qr.cpp
@@ -54,6 +54,8 @@ template<typename MatrixType> void qr_invertible()
 {
   using std::log;
   using std::abs;
+  using std::pow;
+  using std::max;
   typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
   typedef typename MatrixType::Scalar Scalar;
 
@@ -65,7 +67,7 @@ template<typename MatrixType> void qr_invertible()
   if (internal::is_same<RealScalar,float>::value)
   {
     // let's build a matrix more stable to inverse
-    MatrixType a = MatrixType::Random(size,size*2);
+    MatrixType a = MatrixType::Random(size,size*4);
     m1 += a * a.adjoint();
   }
 
@@ -81,8 +83,11 @@ template<typename MatrixType> void qr_invertible()
   m3 = qr.householderQ(); // get a unitary
   m1 = m3 * m1 * m3;
   qr.compute(m1);
-  VERIFY_IS_APPROX(absdet, qr.absDeterminant());
   VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant());
+  // This test is tricky if the determinant becomes too small.
+  // Since we generate random numbers with magnitude rrange [0,1], the average determinant is 0.5^size
+  VERIFY_IS_MUCH_SMALLER_THAN( abs(absdet-qr.absDeterminant()), (max)(RealScalar(pow(0.5,size)),(max)(abs(absdet),abs(qr.absDeterminant()))) );
+  
 }
 
 template<typename MatrixType> void qr_verify_assert()

From 0d3f496233ceb0e96da0a39e360e5bdd5c89c0e3 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 14 Feb 2014 10:18:14 +0100
Subject: [PATCH 39/48] Upload the 3.2 testing result to its own CDash project

---
 CTestConfig.cmake | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/CTestConfig.cmake b/CTestConfig.cmake
index 4c0027824..0557c491a 100644
--- a/CTestConfig.cmake
+++ b/CTestConfig.cmake
@@ -4,14 +4,10 @@
 ## # The following are required to uses Dart and the Cdash dashboard
 ##   ENABLE_TESTING()
 ##   INCLUDE(CTest)
-set(CTEST_PROJECT_NAME "Eigen")
+set(CTEST_PROJECT_NAME "Eigen3.2")
 set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")
 
 set(CTEST_DROP_METHOD "http")
 set(CTEST_DROP_SITE "manao.inria.fr")
-set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
+set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.2")
 set(CTEST_DROP_SITE_CDASH TRUE)
-set(CTEST_PROJECT_SUBPROJECTS
-Official
-Unsupported
-)

From 3283d98d1378ccd3a8c89eec1d88108ddd517d95 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 14 Feb 2014 14:46:01 +0100
Subject: [PATCH 40/48] optimize sparse-sparse Kronecker product

---
 .../KroneckerProduct/KroneckerTensorProduct.h | 17 +++++++++-
 unsupported/test/kronecker_product.cpp        | 34 +++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
index a4516056d..b8f2cba17 100644
--- a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
+++ b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
@@ -153,7 +153,22 @@ void KroneckerProductSparse<Lhs,Rhs>::evalTo(Dest& dst) const
               Bc = m_B.cols();
   dst.resize(this->rows(), this->cols());
   dst.resizeNonZeros(0);
-  dst.reserve(m_A.nonZeros() * m_B.nonZeros());
+  
+  // compute number of non-zeros per innervectors of dst
+  {
+    VectorXi nnzA = VectorXi::Zero(Dest::IsRowMajor ? m_A.rows() : m_A.cols());
+    for (Index kA=0; kA < m_A.outerSize(); ++kA)
+      for (typename Lhs::InnerIterator itA(m_A,kA); itA; ++itA)
+        nnzA(Dest::IsRowMajor ? itA.row() : itA.col())++;
+      
+    VectorXi nnzB = VectorXi::Zero(Dest::IsRowMajor ? m_B.rows() : m_B.cols());
+    for (Index kB=0; kB < m_B.outerSize(); ++kB)
+      for (typename Rhs::InnerIterator itB(m_B,kB); itB; ++itB)
+        nnzB(Dest::IsRowMajor ? itB.row() : itB.col())++;
+    
+    Matrix<int,Dynamic,Dynamic,ColMajor> nnzAB = nnzB * nnzA.transpose();
+    dst.reserve(VectorXi::Map(nnzAB.data(), nnzAB.size()));
+  }
 
   for (Index kA=0; kA < m_A.outerSize(); ++kA)
   {
diff --git a/unsupported/test/kronecker_product.cpp b/unsupported/test/kronecker_product.cpp
index c68a07de8..753a2d417 100644
--- a/unsupported/test/kronecker_product.cpp
+++ b/unsupported/test/kronecker_product.cpp
@@ -183,4 +183,38 @@ void test_kronecker_product()
   DM_b2.resize(4,8);
   DM_ab2 = kroneckerProduct(DM_a2,DM_b2);
   CALL_SUBTEST(check_dimension(DM_ab2,10*4,9*8));
+  
+  for(int i = 0; i < g_repeat; i++)
+  {
+    double density = Eigen::internal::random<double>(0.01,0.5);
+    int ra = Eigen::internal::random<int>(1,50);
+    int ca = Eigen::internal::random<int>(1,50);
+    int rb = Eigen::internal::random<int>(1,50);
+    int cb = Eigen::internal::random<int>(1,50);
+    SparseMatrix<float,ColMajor> sA(ra,ca), sB(rb,cb), sC;
+    SparseMatrix<float,RowMajor> sC2;
+    MatrixXf dA(ra,ca), dB(rb,cb), dC;
+    initSparse(density, dA, sA);
+    initSparse(density, dB, sB);
+    
+    sC = kroneckerProduct(sA,sB);
+    dC = kroneckerProduct(dA,dB);
+    VERIFY_IS_APPROX(MatrixXf(sC),dC);
+    
+    sC = kroneckerProduct(sA.transpose(),sB);
+    dC = kroneckerProduct(dA.transpose(),dB);
+    VERIFY_IS_APPROX(MatrixXf(sC),dC);
+    
+    sC = kroneckerProduct(sA.transpose(),sB.transpose());
+    dC = kroneckerProduct(dA.transpose(),dB.transpose());
+    VERIFY_IS_APPROX(MatrixXf(sC),dC);
+    
+    sC = kroneckerProduct(sA,sB.transpose());
+    dC = kroneckerProduct(dA,dB.transpose());
+    VERIFY_IS_APPROX(MatrixXf(sC),dC);
+    
+    sC2 = kroneckerProduct(sA,sB);
+    dC = kroneckerProduct(dA,dB);
+    VERIFY_IS_APPROX(MatrixXf(sC2),dC);
+  }
 }

From cd606bbc943c4862cf0cf596d3ef97aae32c2887 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 14 Feb 2014 23:10:16 +0100
Subject: [PATCH 41/48] Fix infinite loop in sparselu

---
 Eigen/src/SparseLU/SparseLU_Memory.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h
index d068d62df..1ffa7d54e 100644
--- a/Eigen/src/SparseLU/SparseLU_Memory.h
+++ b/Eigen/src/SparseLU/SparseLU_Memory.h
@@ -70,7 +70,7 @@ Index  SparseLUImpl<Scalar,Index>::expand(VectorType& vec, Index& length, Index
   if(num_expansions == 0 || keep_prev) 
     new_len = length ; // First time allocate requested
   else 
-    new_len = Index(alpha * length);
+    new_len = (std::max)(length+1,Index(alpha * length));
   
   VectorType old_vec; // Temporary vector to hold the previous values   
   if (nbElts > 0 )
@@ -107,7 +107,7 @@ Index  SparseLUImpl<Scalar,Index>::expand(VectorType& vec, Index& length, Index
       do 
       {
         alpha = (alpha + 1)/2;
-        new_len = Index(alpha * length);
+        new_len = (std::max)(length+1,Index(alpha * length));
 #ifdef EIGEN_EXCEPTIONS
         try
 #endif

From 4b6b3f310f5b651d82a65eb546e61d70c09407c5 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Sat, 15 Feb 2014 09:35:23 +0100
Subject: [PATCH 42/48] Fix a few Index to int buggy conversions

---
 .../SparseCore/ConservativeSparseSparseProduct.h   | 14 +++++++-------
 Eigen/src/SparseCore/SparseDenseProduct.h          |  4 ++--
 Eigen/src/SparseCore/SparseMatrix.h                | 10 +++++-----
 .../SparseCore/SparseSparseProductWithPruning.h    |  2 +-
 test/sparse.h                                      |  6 +++---
 test/sparse_product.cpp                            |  1 +
 test/sparse_vector.cpp                             | 13 +++++++------
 7 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
index cd996d5ba..5c320e2d2 100644
--- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
@@ -66,9 +66,9 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
     }
 
     // unordered insertion
-    for(int k=0; k<nnz; ++k)
+    for(Index k=0; k<nnz; ++k)
     {
-      int i = indices[k];
+      Index i = indices[k];
       res.insertBackByOuterInnerUnordered(j,i) = values[i];
       mask[i] = false;
     }
@@ -76,8 +76,8 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
 #if 0
     // alternative ordered insertion code:
 
-    int t200 = rows/(log2(200)*1.39);
-    int t = (rows*100)/139;
+    Index t200 = rows/(log2(200)*1.39);
+    Index t = (rows*100)/139;
 
     // FIXME reserve nnz non zeros
     // FIXME implement fast sort algorithms for very small nnz
@@ -90,9 +90,9 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
     if(true)
     {
       if(nnz>1) std::sort(indices.data(),indices.data()+nnz);
-      for(int k=0; k<nnz; ++k)
+      for(Index k=0; k<nnz; ++k)
       {
-        int i = indices[k];
+        Index i = indices[k];
         res.insertBackByOuterInner(j,i) = values[i];
         mask[i] = false;
       }
@@ -100,7 +100,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
     else
     {
       // dense path
-      for(int i=0; i<rows; ++i)
+      for(Index i=0; i<rows; ++i)
       {
         if(mask[i])
         {
diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h
index 6132a4880..610833f3b 100644
--- a/Eigen/src/SparseCore/SparseDenseProduct.h
+++ b/Eigen/src/SparseCore/SparseDenseProduct.h
@@ -125,7 +125,7 @@ class SparseDenseOuterProduct<Lhs,Rhs,Transpose>::InnerIterator : public _LhsNes
     inline Scalar value() const { return Base::value() * m_factor; }
 
   protected:
-    int m_outer;
+    Index m_outer;
     Scalar m_factor;
 };
 
@@ -156,7 +156,7 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
   {
     for(Index c=0; c<rhs.cols(); ++c)
     {
-      int n = lhs.outerSize();
+      Index n = lhs.outerSize();
       for(Index j=0; j<n; ++j)
       {
         typename Res::Scalar tmp(0);
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index f2e234eee..0e7e760fb 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -402,7 +402,7 @@ class SparseMatrix
       * \sa insertBack, insertBackByOuterInner */
     inline void startVec(Index outer)
     {
-      eigen_assert(m_outerIndex[outer]==int(m_data.size()) && "You must call startVec for each inner vector sequentially");
+      eigen_assert(m_outerIndex[outer]==Index(m_data.size()) && "You must call startVec for each inner vector sequentially");
       eigen_assert(m_outerIndex[outer+1]==0 && "You must call startVec for each inner vector sequentially");
       m_outerIndex[outer+1] = m_outerIndex[outer];
     }
@@ -480,7 +480,7 @@ class SparseMatrix
       if(m_innerNonZeros != 0)
         return; 
       m_innerNonZeros = static_cast<Index*>(std::malloc(m_outerSize * sizeof(Index)));
-      for (int i = 0; i < m_outerSize; i++)
+      for (Index i = 0; i < m_outerSize; i++)
       {
         m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; 
       }
@@ -752,8 +752,8 @@ class SparseMatrix
         else
           for (Index i=0; i<m.outerSize(); ++i)
           {
-            int p = m.m_outerIndex[i];
-            int pe = m.m_outerIndex[i]+m.m_innerNonZeros[i];
+            Index p = m.m_outerIndex[i];
+            Index pe = m.m_outerIndex[i]+m.m_innerNonZeros[i];
             Index k=p;
             for (; k<pe; ++k)
               s << "(" << m.m_data.value(k) << "," << m.m_data.index(k) << ") ";
@@ -1022,7 +1022,7 @@ void SparseMatrix<Scalar,_Options,_Index>::sumupDuplicates()
   wi.fill(-1);
   Index count = 0;
   // for each inner-vector, wi[inner_index] will hold the position of first element into the index/value buffers
-  for(int j=0; j<outerSize(); ++j)
+  for(Index j=0; j<outerSize(); ++j)
   {
     Index start   = count;
     Index oldEnd  = m_outerIndex[j]+m_innerNonZeros[j];
diff --git a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
index 1dccdfe1b..fcc18f5c9 100644
--- a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
+++ b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
@@ -27,7 +27,7 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
   // make sure to call innerSize/outerSize since we fake the storage order.
   Index rows = lhs.innerSize();
   Index cols = rhs.outerSize();
-  //int size = lhs.outerSize();
+  //Index size = lhs.outerSize();
   eigen_assert(lhs.outerSize() == rhs.innerSize());
 
   // allocate a temporary buffer
diff --git a/test/sparse.h b/test/sparse.h
index a09c65e5f..433d48ec4 100644
--- a/test/sparse.h
+++ b/test/sparse.h
@@ -154,16 +154,16 @@ initSparse(double density,
   sparseMat.finalize();
 }
 
-template<typename Scalar> void
+template<typename Scalar,int Options,typename Index> void
 initSparse(double density,
            Matrix<Scalar,Dynamic,1>& refVec,
-           SparseVector<Scalar>& sparseVec,
+           SparseVector<Scalar,Options,Index>& sparseVec,
            std::vector<int>* zeroCoords = 0,
            std::vector<int>* nonzeroCoords = 0)
 {
   sparseVec.reserve(int(refVec.size()*density));
   sparseVec.setZero();
-  for(int i=0; i<refVec.size(); i++)
+  for(Index i=0; i<refVec.size(); i++)
   {
     Scalar v = (internal::random<double>(0,1) < density) ? internal::random<Scalar>() : Scalar(0);
     if (v!=Scalar(0))
diff --git a/test/sparse_product.cpp b/test/sparse_product.cpp
index 664e33887..948b0026b 100644
--- a/test/sparse_product.cpp
+++ b/test/sparse_product.cpp
@@ -244,6 +244,7 @@ void test_sparse_product()
     CALL_SUBTEST_1( (sparse_product<SparseMatrix<double,RowMajor> >()) );
     CALL_SUBTEST_2( (sparse_product<SparseMatrix<std::complex<double>, ColMajor > >()) );
     CALL_SUBTEST_2( (sparse_product<SparseMatrix<std::complex<double>, RowMajor > >()) );
+    CALL_SUBTEST_3( (sparse_product<SparseMatrix<float,ColMajor,long int> >()) );
     CALL_SUBTEST_4( (sparse_product_regression_test<SparseMatrix<double,RowMajor>, Matrix<double, Dynamic, Dynamic, RowMajor> >()) );
   }
 }
diff --git a/test/sparse_vector.cpp b/test/sparse_vector.cpp
index ec5877b6a..0c9476803 100644
--- a/test/sparse_vector.cpp
+++ b/test/sparse_vector.cpp
@@ -9,14 +9,14 @@
 
 #include "sparse.h"
 
-template<typename Scalar> void sparse_vector(int rows, int cols)
+template<typename Scalar,typename Index> void sparse_vector(int rows, int cols)
 {
   double densityMat = (std::max)(8./(rows*cols), 0.01);
   double densityVec = (std::max)(8./float(rows), 0.1);
   typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
   typedef Matrix<Scalar,Dynamic,1> DenseVector;
-  typedef SparseVector<Scalar> SparseVectorType;
-  typedef SparseMatrix<Scalar> SparseMatrixType;
+  typedef SparseVector<Scalar,0,Index> SparseVectorType;
+  typedef SparseMatrix<Scalar,0,Index> SparseMatrixType;
   Scalar eps = 1e-6;
 
   SparseMatrixType m1(rows,rows);
@@ -101,9 +101,10 @@ template<typename Scalar> void sparse_vector(int rows, int cols)
 void test_sparse_vector()
 {
   for(int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_1( sparse_vector<double>(8, 8) );
-    CALL_SUBTEST_2( sparse_vector<std::complex<double> >(16, 16) );
-    CALL_SUBTEST_1( sparse_vector<double>(299, 535) );
+    CALL_SUBTEST_1(( sparse_vector<double,int>(8, 8) ));
+    CALL_SUBTEST_2(( sparse_vector<std::complex<double>, int>(16, 16) ));
+    CALL_SUBTEST_1(( sparse_vector<double,long int>(299, 535) ));
+    CALL_SUBTEST_1(( sparse_vector<double,short>(299, 535) ));
   }
 }
 

From 3bb57e21a8e56917e490a514ab7ebfd3cd3811ab Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Mon, 17 Feb 2014 09:56:46 +0100
Subject: [PATCH 43/48] Fix FFTW unit test with clang

---
 unsupported/test/FFTW.cpp | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/unsupported/test/FFTW.cpp b/unsupported/test/FFTW.cpp
index a07bf274b..d3718e2d2 100644
--- a/unsupported/test/FFTW.cpp
+++ b/unsupported/test/FFTW.cpp
@@ -16,9 +16,6 @@ std::complex<T> RandomCpx() { return std::complex<T>( (T)(rand()/(T)RAND_MAX - .
 using namespace std;
 using namespace Eigen;
 
-float norm(float x) {return x*x;}
-double norm(double x) {return x*x;}
-long double norm(long double x) {return x*x;}
 
 template < typename T>
 complex<long double>  promote(complex<T> x) { return complex<long double>(x.real(),x.imag()); }
@@ -40,11 +37,11 @@ complex<long double>  promote(long double x) { return complex<long double>( x);
             for (size_t k1=0;k1<(size_t)timebuf.size();++k1) {
                 acc +=  promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
             }
-            totalpower += norm(acc);
+            totalpower += numext::abs2(acc);
             complex<long double> x = promote(fftbuf[k0]); 
             complex<long double> dif = acc - x;
-            difpower += norm(dif);
-            //cerr << k0 << "\t" << acc << "\t" <<  x << "\t" << sqrt(norm(dif)) << endl;
+            difpower += numext::abs2(dif);
+            //cerr << k0 << "\t" << acc << "\t" <<  x << "\t" << sqrt(numext::abs2(dif)) << endl;
         }
         cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
         return sqrt(difpower/totalpower);
@@ -57,8 +54,8 @@ complex<long double>  promote(long double x) { return complex<long double>( x);
         long double difpower=0;
         size_t n = (min)( buf1.size(),buf2.size() );
         for (size_t k=0;k<n;++k) {
-            totalpower += (norm( buf1[k] ) + norm(buf2[k]) )/2.;
-            difpower += norm(buf1[k] - buf2[k]);
+            totalpower += (numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2.;
+            difpower += numext::abs2(buf1[k] - buf2[k]);
         }
         return sqrt(difpower/totalpower);
     }

From ed461ba9bcb809767546a7ff3852a1dcc5e7e477 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Mon, 17 Feb 2014 09:57:47 +0100
Subject: [PATCH 44/48] Fix sparse_product/sparse_extra unit tests

---
 test/sparse.h           |  4 ++--
 test/sparse_product.cpp | 16 +++++++++-------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/test/sparse.h b/test/sparse.h
index 433d48ec4..e19a76316 100644
--- a/test/sparse.h
+++ b/test/sparse.h
@@ -178,10 +178,10 @@ initSparse(double density,
   }
 }
 
-template<typename Scalar> void
+template<typename Scalar,int Options,typename Index> void
 initSparse(double density,
            Matrix<Scalar,1,Dynamic>& refVec,
-           SparseVector<Scalar,RowMajor>& sparseVec,
+           SparseVector<Scalar,Options,Index>& sparseVec,
            std::vector<int>* zeroCoords = 0,
            std::vector<int>* nonzeroCoords = 0)
 {
diff --git a/test/sparse_product.cpp b/test/sparse_product.cpp
index 948b0026b..a2ea9d5b7 100644
--- a/test/sparse_product.cpp
+++ b/test/sparse_product.cpp
@@ -13,8 +13,9 @@ template<typename SparseMatrixType, typename DenseMatrix, bool IsRowMajor=Sparse
 
 template<typename SparseMatrixType, typename DenseMatrix> struct test_outer<SparseMatrixType,DenseMatrix,false> {
   static void run(SparseMatrixType& m2, SparseMatrixType& m4, DenseMatrix& refMat2, DenseMatrix& refMat4) {
-    int c  = internal::random(0,m2.cols()-1);
-    int c1 = internal::random(0,m2.cols()-1);
+    typedef typename SparseMatrixType::Index Index;
+    Index c  = internal::random<Index>(0,m2.cols()-1);
+    Index c1 = internal::random<Index>(0,m2.cols()-1);
     VERIFY_IS_APPROX(m4=m2.col(c)*refMat2.col(c1).transpose(), refMat4=refMat2.col(c)*refMat2.col(c1).transpose());
     VERIFY_IS_APPROX(m4=refMat2.col(c1)*m2.col(c).transpose(), refMat4=refMat2.col(c1)*refMat2.col(c).transpose());
   }
@@ -22,8 +23,9 @@ template<typename SparseMatrixType, typename DenseMatrix> struct test_outer<Spar
 
 template<typename SparseMatrixType, typename DenseMatrix> struct test_outer<SparseMatrixType,DenseMatrix,true> {
   static void run(SparseMatrixType& m2, SparseMatrixType& m4, DenseMatrix& refMat2, DenseMatrix& refMat4) {
-    int r  = internal::random(0,m2.rows()-1);
-    int c1 = internal::random(0,m2.cols()-1);
+    typedef typename SparseMatrixType::Index Index;
+    Index r  = internal::random<Index>(0,m2.rows()-1);
+    Index c1 = internal::random<Index>(0,m2.cols()-1);
     VERIFY_IS_APPROX(m4=m2.row(r).transpose()*refMat2.col(c1).transpose(), refMat4=refMat2.row(r).transpose()*refMat2.col(c1).transpose());
     VERIFY_IS_APPROX(m4=refMat2.col(c1)*m2.row(r), refMat4=refMat2.col(c1)*refMat2.row(r));
   }
@@ -37,9 +39,9 @@ template<typename SparseMatrixType> void sparse_product()
 {
   typedef typename SparseMatrixType::Index Index;
   Index n = 100;
-  const Index rows  = internal::random<int>(1,n);
-  const Index cols  = internal::random<int>(1,n);
-  const Index depth = internal::random<int>(1,n);
+  const Index rows  = internal::random<Index>(1,n);
+  const Index cols  = internal::random<Index>(1,n);
+  const Index depth = internal::random<Index>(1,n);
   typedef typename SparseMatrixType::Scalar Scalar;
   enum { Flags = SparseMatrixType::Flags };
 

From bd6eca059d1026743188adecebd33ece12a23184 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Mon, 17 Feb 2014 10:00:38 +0100
Subject: [PATCH 45/48] Fix compilation of SPlines module

---
 unsupported/Eigen/src/Splines/SplineFwd.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/unsupported/Eigen/src/Splines/SplineFwd.h b/unsupported/Eigen/src/Splines/SplineFwd.h
index 5d9bfacf8..9ea23a9a1 100644
--- a/unsupported/Eigen/src/Splines/SplineFwd.h
+++ b/unsupported/Eigen/src/Splines/SplineFwd.h
@@ -64,6 +64,8 @@ namespace Eigen
     {
       enum { OrderAtCompileTime = _Degree==Dynamic ? Dynamic : _Degree+1 /*!< The spline curve's order at compile-time. */ };
       enum { NumOfDerivativesAtCompileTime = _DerivativeOrder==Dynamic ? Dynamic : _DerivativeOrder+1 /*!< The number of derivatives defined for the current spline. */ };
+      
+      enum { DerivativeMemoryLayout = _Dim==1 ? RowMajor : ColMajor /*!< The derivative type's memory layout. */ };
 
       /** \brief The data type used to store the values of the basis function derivatives. */
       typedef Array<_Scalar,Dynamic,Dynamic,RowMajor,NumOfDerivativesAtCompileTime,OrderAtCompileTime> BasisDerivativeType;

From 3573a107120c3762c1f9b6b17f62d817ee1a9ec0 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Mon, 17 Feb 2014 13:46:17 +0100
Subject: [PATCH 46/48] Fix support for row (resp. column) of a column-major
 (resp. row-major) sparse matrix

---
 Eigen/src/SparseCore/SparseBlock.h | 133 +++++++++++++++++++++++------
 test/sparse_basic.cpp              |   8 ++
 2 files changed, 115 insertions(+), 26 deletions(-)

diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h
index d0436aa7c..4fa98c01c 100644
--- a/Eigen/src/SparseCore/SparseBlock.h
+++ b/Eigen/src/SparseCore/SparseBlock.h
@@ -335,6 +335,14 @@ const Block<const Derived,Dynamic,Dynamic,true> SparseMatrixBase<Derived>::inner
   
 }
 
+namespace internal {
+  
+template< typename XprType, int BlockRows, int BlockCols, bool InnerPanel,
+          bool OuterVector =  (BlockCols==1 && XprType::IsRowMajor) || (BlockRows==1 && !XprType::IsRowMajor)>
+class GenericSparseBlockInnerIteratorImpl;
+
+}
+
 /** Generic implementation of sparse Block expression.
   * Real-only. 
   */
@@ -354,8 +362,8 @@ public:
       : m_matrix(xpr),
         m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
         m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
-        m_blockRows(xpr.rows()),
-        m_blockCols(xpr.cols())
+        m_blockRows(BlockRows==1 ? 1 : xpr.rows()),
+        m_blockCols(BlockCols==1 ? 1 : xpr.cols())
     {}
 
     /** Dynamic-size constructor
@@ -394,29 +402,8 @@ public:
     
     inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
     
-    class InnerIterator : public _MatrixTypeNested::InnerIterator
-    {
-      typedef typename _MatrixTypeNested::InnerIterator Base;
-      const BlockType& m_block;
-      Index m_end;
-    public:
-
-      EIGEN_STRONG_INLINE InnerIterator(const BlockType& block, Index outer)
-        : Base(block.derived().nestedExpression(), outer + (IsRowMajor ? block.m_startRow.value() : block.m_startCol.value())),
-          m_block(block),
-          m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
-      {
-        while( (Base::operator bool()) && (Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value())) )
-          Base::operator++();
-      }
-
-      inline Index index()  const { return Base::index() - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
-      inline Index outer()  const { return Base::outer() - (IsRowMajor ? m_block.m_startRow.value() : m_block.m_startCol.value()); }
-      inline Index row()    const { return Base::row()   - m_block.m_startRow.value(); }
-      inline Index col()    const { return Base::col()   - m_block.m_startCol.value(); }
-      
-      inline operator bool() const { return Base::operator bool() && Base::index() < m_end; }
-    };
+    typedef internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel> InnerIterator;
+    
     class ReverseInnerIterator : public _MatrixTypeNested::ReverseInnerIterator
     {
       typedef typename _MatrixTypeNested::ReverseInnerIterator Base;
@@ -441,7 +428,7 @@ public:
       inline operator bool() const { return Base::operator bool() && Base::index() >= m_begin; }
     };
   protected:
-    friend class InnerIterator;
+    friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
     friend class ReverseInnerIterator;
     
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
@@ -454,6 +441,100 @@ public:
 
 };
 
+namespace internal {
+  template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+  class GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel,false> : public Block<XprType, BlockRows, BlockCols, InnerPanel>::_MatrixTypeNested::InnerIterator
+  {
+    typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+    enum {
+      IsRowMajor = BlockType::IsRowMajor
+    };
+    typedef typename BlockType::_MatrixTypeNested _MatrixTypeNested;
+    typedef typename BlockType::Index Index;
+    typedef typename _MatrixTypeNested::InnerIterator Base;
+    const BlockType& m_block;
+    Index m_end;
+  public:
+
+    EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer)
+      : Base(block.derived().nestedExpression(), outer + (IsRowMajor ? block.m_startRow.value() : block.m_startCol.value())),
+        m_block(block),
+        m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
+    {
+      while( (Base::operator bool()) && (Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value())) )
+        Base::operator++();
+    }
+    
+    inline Index index()  const { return Base::index() - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
+    inline Index outer()  const { return Base::outer() - (IsRowMajor ? m_block.m_startRow.value() : m_block.m_startCol.value()); }
+    inline Index row()    const { return Base::row()   - m_block.m_startRow.value(); }
+    inline Index col()    const { return Base::col()   - m_block.m_startCol.value(); }
+    
+    inline operator bool() const { return Base::operator bool() && Base::index() < m_end; }
+  };
+  
+  // Row vector of a column-major sparse matrix or column of a row-major one.
+  template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+  class GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel,true>
+  {
+    typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+    enum {
+      IsRowMajor = BlockType::IsRowMajor
+    };
+    typedef typename BlockType::_MatrixTypeNested _MatrixTypeNested;
+    typedef typename BlockType::Index Index;
+    typedef typename BlockType::Scalar Scalar;
+    const BlockType& m_block;
+    Index m_outerPos;
+    Index m_innerIndex;
+    Scalar m_value;
+    Index m_end;
+  public:
+
+    EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer = 0)
+      : 
+        m_block(block),
+        m_outerPos( (IsRowMajor ? block.m_startCol.value() : block.m_startRow.value()) - 1), // -1 so that operator++ finds the first non-zero entry
+        m_innerIndex(IsRowMajor ? block.m_startRow.value() : block.m_startCol.value()),
+        m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
+    {
+      EIGEN_UNUSED_VARIABLE(outer);
+      eigen_assert(outer==0);
+      
+      ++(*this);
+    }
+    
+    inline Index index()  const { return m_outerPos - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
+    inline Index outer()  const { return 0; }
+    inline Index row()    const { return IsRowMajor ? 0 : index(); }
+    inline Index col()    const { return IsRowMajor ? index() : 0; }
+    
+    inline Scalar value() const { return m_value; }
+    
+    inline GenericSparseBlockInnerIteratorImpl& operator++()
+    {
+      // search next non-zero entry
+      while(m_outerPos<m_end)
+      {
+        m_outerPos++;
+        typename XprType::InnerIterator it(m_block.m_matrix, m_outerPos);
+        // search for the key m_innerIndex in the current outer-vector
+        while(it && it.index() < m_innerIndex) ++it;
+        if(it && it.index()==m_innerIndex)
+        {
+          m_value = it.value();
+          break;
+        }
+      }
+      return *this;
+    }
+    
+    inline operator bool() const { return m_outerPos < m_end; }
+  };
+  
+} // end namespace internal
+
+
 } // end namespace Eigen
 
 #endif // EIGEN_SPARSE_BLOCK_H
diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp
index d5ff9f80b..6c620f037 100644
--- a/test/sparse_basic.cpp
+++ b/test/sparse_basic.cpp
@@ -270,6 +270,14 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
       VERIFY_IS_APPROX(m1.innerVector(0).dot(refM2.row(0)), refM1.row(0).dot(refM2.row(0)));
     else
       VERIFY_IS_APPROX(m1.innerVector(0).dot(refM2.row(0)), refM1.col(0).dot(refM2.row(0)));
+    
+    DenseVector rv = DenseVector::Random(m1.cols());
+    DenseVector cv = DenseVector::Random(m1.rows());
+    Index r = internal::random<Index>(0,m1.rows()-2);
+    Index c = internal::random<Index>(0,m1.cols()-1);
+    VERIFY_IS_APPROX(( m1.template block<1,Dynamic>(r,0,1,m1.cols()).dot(rv)) , refM1.row(r).dot(rv));
+    VERIFY_IS_APPROX(m1.row(r).dot(rv), refM1.row(r).dot(rv));
+    VERIFY_IS_APPROX(m1.col(c).dot(cv), refM1.col(c).dot(cv));
 
     VERIFY_IS_APPROX(m1.conjugate(), refM1.conjugate());
     VERIFY_IS_APPROX(m1.real(), refM1.real());

From b14a4628aff4b92761229c862a331342a2bb518b Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Mon, 17 Feb 2014 13:48:00 +0100
Subject: [PATCH 47/48] Relaxed umeyama test. Problem was ill-posed if linear
 part was scaled with very small number. This should fix bug #744.

---
 test/umeyama.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/test/umeyama.cpp b/test/umeyama.cpp
index 738d0af70..2e8092434 100644
--- a/test/umeyama.cpp
+++ b/test/umeyama.cpp
@@ -130,10 +130,11 @@ void run_fixed_size_test(int num_elements)
 
   // MUST be positive because in any other case det(cR_t) may become negative for
   // odd dimensions!
-  const Scalar c = abs(internal::random<Scalar>());
+  // Also if c is to small compared to t.norm(), problem is ill-posed (cf. Bug 744)
+  const Scalar c = internal::random<Scalar>(0.5, 2.0);
 
   FixedMatrix R = randMatrixSpecialUnitary<Scalar>(dim);
-  FixedVector t = Scalar(50)*FixedVector::Random(dim,1);
+  FixedVector t = Scalar(32)*FixedVector::Random(dim,1);
 
   HomMatrix cR_t = HomMatrix::Identity(dim+1,dim+1);
   cR_t.block(0,0,dim,dim) = c*R;
@@ -149,9 +150,9 @@ void run_fixed_size_test(int num_elements)
 
   HomMatrix cR_t_umeyama = umeyama(src_block, dst_block);
 
-  const Scalar error = ( cR_t_umeyama*src - dst ).array().square().sum();
+  const Scalar error = ( cR_t_umeyama*src - dst ).squaredNorm();
 
-  VERIFY(error < Scalar(10)*std::numeric_limits<Scalar>::epsilon());
+  VERIFY(error < Scalar(16)*std::numeric_limits<Scalar>::epsilon());
 }
 
 void test_umeyama()

From 37a1d736bfd6fdaa0e1a93aee585d1861ab4004a Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Tue, 18 Feb 2014 13:35:24 +0100
Subject: [PATCH 48/48] _MatrixTypeNested must be public in sparse Block

---
 Eigen/src/SparseCore/SparseBlock.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h
index 4fa98c01c..6f615e250 100644
--- a/Eigen/src/SparseCore/SparseBlock.h
+++ b/Eigen/src/SparseCore/SparseBlock.h
@@ -350,11 +350,12 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
 class BlockImpl<XprType,BlockRows,BlockCols,InnerPanel,Sparse>
   : public SparseMatrixBase<Block<XprType,BlockRows,BlockCols,InnerPanel> >, internal::no_assignment_operator
 {
-  typedef typename internal::remove_all<typename XprType::Nested>::type _MatrixTypeNested;
   typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
 public:
     enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };
     EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
+    
+    typedef typename internal::remove_all<typename XprType::Nested>::type _MatrixTypeNested;
 
     /** Column or Row constructor
       */