diff --git a/Eigen/Eigenvalues b/Eigen/Eigenvalues
index 9a6443f39..8c6841549 100644
--- a/Eigen/Eigenvalues
+++ b/Eigen/Eigenvalues
@@ -8,6 +8,7 @@
 #include "Cholesky"
 #include "Jacobi"
 #include "Householder"
+#include "LU"
 
 // Note that EIGEN_HIDE_HEAVY_CODE has to be defined per module
 #if (defined EIGEN_EXTERN_INSTANTIATIONS) && (EIGEN_EXTERN_INSTANTIATIONS>=2)
diff --git a/Eigen/src/Array/VectorwiseOp.h b/Eigen/src/Array/VectorwiseOp.h
index 4cb0083fa..880567212 100644
--- a/Eigen/src/Array/VectorwiseOp.h
+++ b/Eigen/src/Array/VectorwiseOp.h
@@ -122,6 +122,7 @@ class PartialReduxExpr : ei_no_assignment_operator,
 EIGEN_MEMBER_FUNCTOR(squaredNorm, Size * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(sum, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(mean, (Size-1)*NumTraits<Scalar>::AddCost + NumTraits<Scalar>::MulCost);
 EIGEN_MEMBER_FUNCTOR(minCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits<Scalar>::AddCost);
@@ -297,6 +298,13 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
     const typename ReturnType<ei_member_sum>::Type sum() const
     { return _expression(); }
 
+    /** \returns a row (or column) vector expression of the mean
+    * of each column (or row) of the referenced expression.
+    *
+    * \sa MatrixBase::mean() */
+    const typename ReturnType<ei_member_mean>::Type mean() const
+    { return _expression(); }
+
     /** \returns a row (or column) vector expression representing
       * whether \b all coefficients of each respective column (or row) are \c true.
       *
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index 4bd1046a7..8dc015715 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -93,7 +93,7 @@ public:
               ? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
               : int(NoUnrolling)
   };
-  
+
   static void debug()
   {
     EIGEN_DEBUG_VAR(DstIsAligned)
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h
index cbaeb83e2..259f40244 100644
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -350,7 +350,7 @@ struct ei_scalar_multiple_op {
   EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
   EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
   { return ei_pmul(a, ei_pset1(m_other)); }
-  const Scalar m_other;
+  typename ei_makeconst<typename NumTraits<Scalar>::Nested>::type m_other;
 private:
   ei_scalar_multiple_op& operator=(const ei_scalar_multiple_op&);
 };
@@ -364,7 +364,7 @@ struct ei_scalar_multiple2_op {
   EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const ei_scalar_multiple2_op& other) : m_other(other.m_other) { }
   EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
   EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
-  const Scalar2 m_other;
+  typename ei_makeconst<typename NumTraits<Scalar2>::Nested>::type m_other;
 };
 template<typename Scalar1,typename Scalar2>
 struct ei_functor_traits<ei_scalar_multiple2_op<Scalar1,Scalar2> >
@@ -393,7 +393,7 @@ struct ei_scalar_quotient1_impl<Scalar,false> {
   EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const ei_scalar_quotient1_impl& other) : m_other(other.m_other) { }
   EIGEN_STRONG_INLINE ei_scalar_quotient1_impl(const Scalar& other) : m_other(other) {}
   EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
-  const Scalar m_other;
+  typename ei_makeconst<typename NumTraits<Scalar>::Nested>::type m_other;
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_quotient1_impl<Scalar,false> >
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 7b5310175..e5eed715b 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -145,12 +145,6 @@ template<typename Derived> class MatrixBase
 #endif
     };
 
-    /** Default constructor. Just checks at compile-time for self-consistency of the flags. */
-    MatrixBase()
-    {
-      ei_assert(ei_are_flags_consistent<Flags>::ret);
-    }
-
 #ifndef EIGEN_PARSED_BY_DOXYGEN
     /** This is the "real scalar" type; if the \a Scalar type is already real numbers
       * (e.g. int, float or double) then \a RealScalar is just the same as \a Scalar. If
@@ -177,7 +171,7 @@ template<typename Derived> class MatrixBase
     inline int diagonalSize() const { return std::min(rows(),cols()); }
     /** \returns the number of nonzero coefficients which is in practice the number
       * of stored coefficients. */
-    inline int nonZeros() const { return derived().nonZeros(); }
+    inline int nonZeros() const { return size(); }
     /** \returns true if either the number of rows or the number of columns is equal to 1.
       * In other words, this function returns
       * \code rows()==1 || cols()==1 \endcode
@@ -645,8 +639,9 @@ template<typename Derived> class MatrixBase
     const CwiseBinaryOp<CustomBinaryOp, Derived, OtherDerived>
     binaryExpr(const MatrixBase<OtherDerived> &other, const CustomBinaryOp& func = CustomBinaryOp()) const;
 
-
+    
     Scalar sum() const;
+    Scalar mean() const;
     Scalar trace() const;
 
     Scalar prod() const;
@@ -811,6 +806,24 @@ template<typename Derived> class MatrixBase
     #ifdef EIGEN_MATRIXBASE_PLUGIN
     #include EIGEN_MATRIXBASE_PLUGIN
     #endif
+
+  protected:
+    /** Default constructor. Do nothing. */
+    MatrixBase()
+    {
+      /* Just checks for self-consistency of the flags.
+       * Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down
+       */
+#ifdef EIGEN_INTERNAL_DEBUGGING
+      EIGEN_STATIC_ASSERT(ei_are_flags_consistent<Flags>::ret,
+                          INVALID_MATRIXBASE_TEMPLATE_PARAMETERS)
+#endif
+    }
+    
+  private:
+    explicit MatrixBase(int);
+    MatrixBase(int,int);
+    template<typename OtherDerived> explicit MatrixBase(const MatrixBase<OtherDerived>&);
 };
 
 #endif // EIGEN_MATRIXBASE_H
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index 24afe54c5..304e2c1d6 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -52,6 +52,7 @@ template<> struct NumTraits<int>
 {
   typedef int Real;
   typedef double FloatingPoint;
+  typedef int Nested;
   enum {
     IsComplex = 0,
     HasFloatingPoint = 0,
@@ -65,6 +66,7 @@ template<> struct NumTraits<float>
 {
   typedef float Real;
   typedef float FloatingPoint;
+  typedef float Nested;
   enum {
     IsComplex = 0,
     HasFloatingPoint = 1,
@@ -78,6 +80,7 @@ template<> struct NumTraits<double>
 {
   typedef double Real;
   typedef double FloatingPoint;
+  typedef double Nested;
   enum {
     IsComplex = 0,
     HasFloatingPoint = 1,
@@ -91,6 +94,7 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
 {
   typedef _Real Real;
   typedef std::complex<_Real> FloatingPoint;
+  typedef std::complex<_Real> Nested;
   enum {
     IsComplex = 1,
     HasFloatingPoint = NumTraits<Real>::HasFloatingPoint,
@@ -104,6 +108,7 @@ template<> struct NumTraits<long long int>
 {
   typedef long long int Real;
   typedef long double FloatingPoint;
+  typedef long long int Nested;
   enum {
     IsComplex = 0,
     HasFloatingPoint = 0,
@@ -117,6 +122,7 @@ template<> struct NumTraits<long double>
 {
   typedef long double Real;
   typedef long double FloatingPoint;
+  typedef long double Nested;
   enum {
     IsComplex = 0,
     HasFloatingPoint = 1,
@@ -130,6 +136,7 @@ template<> struct NumTraits<bool>
 {
   typedef bool Real;
   typedef float FloatingPoint;
+  typedef bool Nested;
   enum {
     IsComplex = 0,
     HasFloatingPoint = 0,
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index 0df095750..9f796157a 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -342,7 +342,7 @@ MatrixBase<Derived>::maxCoeff() const
 
 /** \returns the sum of all coefficients of *this
   *
-  * \sa trace(), prod()
+  * \sa trace(), prod(), mean()
   */
 template<typename Derived>
 EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
@@ -351,12 +351,23 @@ MatrixBase<Derived>::sum() const
   return this->redux(Eigen::ei_scalar_sum_op<Scalar>());
 }
 
+/** \returns the mean of all coefficients of *this
+*
+* \sa trace(), prod(), sum()
+*/
+template<typename Derived>
+EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
+MatrixBase<Derived>::mean() const
+{
+  return this->redux(Eigen::ei_scalar_sum_op<Scalar>()) / this->size();
+}
+
 /** \returns the product of all coefficients of *this
   *
   * Example: \include MatrixBase_prod.cpp
   * Output: \verbinclude MatrixBase_prod.out
   *
-  * \sa sum()
+  * \sa sum(), mean(), trace()
   */
 template<typename Derived>
 EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index 8527edc2a..990aa3807 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -69,7 +69,6 @@ template<typename MatrixType> class Transpose
 
     inline int rows() const { return m_matrix.cols(); }
     inline int cols() const { return m_matrix.rows(); }
-    inline int nonZeros() const { return m_matrix.nonZeros(); }
     inline int stride() const { return m_matrix.stride(); }
     inline Scalar* data() { return m_matrix.data(); }
     inline const Scalar* data() const { return m_matrix.data(); }
@@ -354,5 +353,5 @@ lazyAssign(const CwiseBinaryOp<ei_scalar_sum_op<Scalar>,DerivedA,CwiseUnaryOp<ei
   return lazyAssign(static_cast<const MatrixBase<CwiseBinaryOp<ei_scalar_sum_op<Scalar>,DerivedA,CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, NestByValue<Eigen::Transpose<DerivedB> > > > >& >(other));
 }
 #endif
-    
+
 #endif // EIGEN_TRANSPOSE_H
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index eb1c2d311..60ccadc21 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -220,8 +220,14 @@ template<> EIGEN_STRONG_INLINE void ei_pstoreu<double>(double* to, const Packet2
 template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float*  to, const Packet4f& from) { ei_pstoreu((double*)to, _mm_castps_pd(from)); }
 template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int*      to, const Packet4i& from) { ei_pstoreu((double*)to, _mm_castsi128_pd(from)); }
 
-#ifdef _MSC_VER
-// this fix internal compilation error
+#if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64)
+// The temporary variable fixes an internal compilation error.
+// Direct of the struct members fixed bug #62.
+template<> EIGEN_STRONG_INLINE float  ei_pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
+template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
+template<> EIGEN_STRONG_INLINE int    ei_pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#elif defined(_MSC_VER) && (_MSC_VER <= 1500)
+// The temporary variable fixes an internal compilation error.
 template<> EIGEN_STRONG_INLINE float  ei_pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
 template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
 template<> EIGEN_STRONG_INLINE int    ei_pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index f8581eebc..bc5235582 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -83,7 +83,7 @@ inline void* ei_aligned_malloc(size_t size)
     ei_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
   #endif
 
-  void *result;  
+  void *result;
   #if !EIGEN_ALIGN
     result = malloc(size);
   #elif EIGEN_MALLOC_ALREADY_ALIGNED
@@ -97,7 +97,7 @@ inline void* ei_aligned_malloc(size_t size)
   #else
     result = ei_handmade_aligned_malloc(size);
   #endif
-    
+
   #ifdef EIGEN_EXCEPTIONS
     if(result == 0)
       throw std::bad_alloc();
@@ -324,34 +324,34 @@ public:
         typedef aligned_allocator<U> other;
     };
 
-    pointer address( reference value ) const 
+    pointer address( reference value ) const
     {
         return &value;
     }
 
-    const_pointer address( const_reference value ) const 
+    const_pointer address( const_reference value ) const
     {
         return &value;
     }
 
-    aligned_allocator() throw() 
+    aligned_allocator() throw()
     {
     }
 
-    aligned_allocator( const aligned_allocator& ) throw() 
+    aligned_allocator( const aligned_allocator& ) throw()
     {
     }
 
     template<class U>
-    aligned_allocator( const aligned_allocator<U>& ) throw() 
+    aligned_allocator( const aligned_allocator<U>& ) throw()
     {
     }
 
-    ~aligned_allocator() throw() 
+    ~aligned_allocator() throw()
     {
     }
 
-    size_type max_size() const throw() 
+    size_type max_size() const throw()
     {
         return std::numeric_limits<size_type>::max();
     }
@@ -362,24 +362,24 @@ public:
         return static_cast<pointer>( ei_aligned_malloc( num * sizeof(T) ) );
     }
 
-    void construct( pointer p, const T& value ) 
+    void construct( pointer p, const T& value )
     {
         ::new( p ) T( value );
     }
 
-    void destroy( pointer p ) 
+    void destroy( pointer p )
     {
         p->~T();
     }
 
-    void deallocate( pointer p, size_type /*num*/ ) 
+    void deallocate( pointer p, size_type /*num*/ )
     {
         ei_aligned_free( p );
     }
-    
+
     bool operator!=(const aligned_allocator<T>& other) const
     { return false; }
-    
+
     bool operator==(const aligned_allocator<T>& other) const
     { return true; }
 };
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 3a960bea6..2fdfd93b5 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -64,6 +64,13 @@ template<typename T> struct ei_cleantype<T&>        { typedef typename ei_cleant
 template<typename T> struct ei_cleantype<const T*>  { typedef typename ei_cleantype<T>::type type; };
 template<typename T> struct ei_cleantype<T*>        { typedef typename ei_cleantype<T>::type type; };
 
+template<typename T> struct ei_makeconst            { typedef const T type;  };
+template<typename T> struct ei_makeconst<const T>   { typedef const T type;  };
+template<typename T> struct ei_makeconst<T&>        { typedef const T& type; };
+template<typename T> struct ei_makeconst<const T&>  { typedef const T& type; };
+template<typename T> struct ei_makeconst<T*>        { typedef const T* type; };
+template<typename T> struct ei_makeconst<const T*>  { typedef const T* type; };
+
 /** \internal Allows to enable/disable an overload
   * according to a compile time condition.
   */
diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h
index 6210bc91c..56a57b706 100644
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -76,6 +76,7 @@
         THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES,
         THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES,
         INVALID_MATRIX_TEMPLATE_PARAMETERS,
+        INVALID_MATRIXBASE_TEMPLATE_PARAMETERS,
         BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
         THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX,
         THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE,
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index cea2faaa8..be4266f85 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -1,4 +1,4 @@
-// This file is part of Eigen, a lightweight C++ template library
+// // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
diff --git a/Eigen/src/Eigenvalues/ComplexSchur.h b/Eigen/src/Eigenvalues/ComplexSchur.h
index 0534715c4..a25af342d 100644
--- a/Eigen/src/Eigenvalues/ComplexSchur.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur.h
@@ -167,10 +167,11 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
     //locate the range in which to iterate
     while(iu > 0)
     {
-      d = ei_norm1(m_matT.coeffRef(iu,iu)) + ei_norm1(m_matT.coeffRef(iu-1,iu-1));
-      sd = ei_norm1(m_matT.coeffRef(iu,iu-1));
+      d = ei_norm1(m_matT.coeff(iu,iu)) + ei_norm1(m_matT.coeff(iu-1,iu-1));
+      sd = ei_norm1(m_matT.coeff(iu,iu-1));
 
-      if(sd >= eps * d) break; // FIXME : precision criterion ??
+      if(!ei_isMuchSmallerThan(sd,d,eps))
+        break;
 
       m_matT.coeffRef(iu,iu-1) = Complex(0);
       iter = 0;
@@ -187,13 +188,14 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
     }
 
     il = iu-1;
-    while( il > 0 )
+    while(il > 0)
     {
       // check if the current 2x2 block on the diagonal is upper triangular
-      d = ei_norm1(m_matT.coeffRef(il,il)) + ei_norm1(m_matT.coeffRef(il-1,il-1));
-      sd = ei_norm1(m_matT.coeffRef(il,il-1));
+      d = ei_norm1(m_matT.coeff(il,il)) + ei_norm1(m_matT.coeff(il-1,il-1));
+      sd = ei_norm1(m_matT.coeff(il,il-1));
 
-      if(sd < eps * d) break; // FIXME : precision criterion ??
+      if(ei_isMuchSmallerThan(sd,d,eps))
+        break;
 
       --il;
     }
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 67b040165..b08a027c9 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -26,6 +26,155 @@
 #ifndef EIGEN_QUATERNION_H
 #define EIGEN_QUATERNION_H
 
+/***************************************************************************
+* Definition of QuaternionBase<Derived>
+* The implementation is at the end of the file
+***************************************************************************/
+
+template<typename Other,
+         int OtherRows=Other::RowsAtCompileTime,
+         int OtherCols=Other::ColsAtCompileTime>
+struct ei_quaternionbase_assign_impl;
+
+template<class Derived>
+class QuaternionBase : public RotationBase<Derived, 3>
+{
+  typedef RotationBase<Derived, 3> Base;
+public:
+  using Base::operator*;
+  using Base::derived;
+
+  typedef typename ei_traits<Derived>::Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef typename ei_traits<Derived>::Coefficients Coefficients;
+
+ // typedef typename Matrix<Scalar,4,1> Coefficients;
+  /** the type of a 3D vector */
+  typedef Matrix<Scalar,3,1> Vector3;
+  /** the equivalent rotation matrix type */
+  typedef Matrix<Scalar,3,3> Matrix3;
+  /** the equivalent angle-axis type */
+  typedef AngleAxis<Scalar> AngleAxisType;
+
+
+
+  /** \returns the \c x coefficient */
+  inline Scalar x() const { return this->derived().coeffs().coeff(0); }
+  /** \returns the \c y coefficient */
+  inline Scalar y() const { return this->derived().coeffs().coeff(1); }
+  /** \returns the \c z coefficient */
+  inline Scalar z() const { return this->derived().coeffs().coeff(2); }
+  /** \returns the \c w coefficient */
+  inline Scalar w() const { return this->derived().coeffs().coeff(3); }
+
+  /** \returns a reference to the \c x coefficient */
+  inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
+  /** \returns a reference to the \c y coefficient */
+  inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
+  /** \returns a reference to the \c z coefficient */
+  inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
+  /** \returns a reference to the \c w coefficient */
+  inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
+
+  /** \returns a read-only vector expression of the imaginary part (x,y,z) */
+  inline const VectorBlock<Coefficients,3> vec() const { return coeffs().template start<3>(); }
+
+  /** \returns a vector expression of the imaginary part (x,y,z) */
+  inline VectorBlock<Coefficients,3> vec() { return coeffs().template start<3>(); }
+
+  /** \returns a read-only vector expression of the coefficients (x,y,z,w) */
+  inline const typename ei_traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }
+
+  /** \returns a vector expression of the coefficients (x,y,z,w) */
+  inline typename ei_traits<Derived>::Coefficients& coeffs() { return derived().coeffs(); }
+
+  template<class OtherDerived> Derived& operator=(const QuaternionBase<OtherDerived>& other);
+
+// disabled this copy operator as it is giving very strange compilation errors when compiling
+// test_stdvector with GCC 4.4.2. This looks like a GCC bug though, so feel free to re-enable it if it's
+// useful; however notice that we already have the templated operator= above and e.g. in MatrixBase
+// we didn't have to add, in addition to templated operator=, such a non-templated copy operator.
+//  Derived& operator=(const QuaternionBase& other)
+//  { return operator=<Derived>(other); }
+
+  Derived& operator=(const AngleAxisType& aa);
+  template<class OtherDerived> Derived& operator=(const MatrixBase<OtherDerived>& m);
+
+  /** \returns a quaternion representing an identity rotation
+    * \sa MatrixBase::Identity()
+    */
+  inline static Quaternion<Scalar> Identity() { return Quaternion<Scalar>(1, 0, 0, 0); }
+
+  /** \sa QuaternionBase::Identity(), MatrixBase::setIdentity()
+    */
+  inline QuaternionBase& setIdentity() { coeffs() << 0, 0, 0, 1; return *this; }
+
+  /** \returns the squared norm of the quaternion's coefficients
+    * \sa QuaternionBase::norm(), MatrixBase::squaredNorm()
+    */
+  inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }
+
+  /** \returns the norm of the quaternion's coefficients
+    * \sa QuaternionBase::squaredNorm(), MatrixBase::norm()
+    */
+  inline Scalar norm() const { return coeffs().norm(); }
+
+  /** Normalizes the quaternion \c *this
+    * \sa normalized(), MatrixBase::normalize() */
+  inline void normalize() { coeffs().normalize(); }
+  /** \returns a normalized copy of \c *this
+    * \sa normalize(), MatrixBase::normalized() */
+  inline Quaternion<Scalar> normalized() const { return Quaternion<Scalar>(coeffs().normalized()); }
+
+    /** \returns the dot product of \c *this and \a other
+    * Geometrically speaking, the dot product of two unit quaternions
+    * corresponds to the cosine of half the angle between the two rotations.
+    * \sa angularDistance()
+    */
+  template<class OtherDerived> inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }
+
+  template<class OtherDerived> inline Scalar angularDistance(const QuaternionBase<OtherDerived>& other) const;
+
+  Matrix3 toRotationMatrix() const;
+
+  template<typename Derived1, typename Derived2>
+  Derived& setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
+
+  template<class OtherDerived> inline Quaternion<Scalar> operator* (const QuaternionBase<OtherDerived>& q) const;
+  template<class OtherDerived> inline Derived& operator*= (const QuaternionBase<OtherDerived>& q);
+
+  Quaternion<Scalar> inverse() const;
+  Quaternion<Scalar> conjugate() const;
+
+  template<class OtherDerived> Quaternion<Scalar> slerp(Scalar t, const QuaternionBase<OtherDerived>& other) const;
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  template<class OtherDerived>
+  bool isApprox(const QuaternionBase<OtherDerived>& other, RealScalar prec = precision<Scalar>()) const
+  { return coeffs().isApprox(other.coeffs(), prec); }
+
+  Vector3 _transformVector(Vector3 v) const;
+
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  inline typename ei_cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const
+  {
+    return typename ei_cast_return_type<Derived,Quaternion<NewScalarType> >::type(
+      coeffs().template cast<NewScalarType>());
+  }
+};
+
+/***************************************************************************
+* Definition/implementation of Quaternion<Scalar>
+***************************************************************************/
+
 /** \geometry_module \ingroup Geometry_Module
   *
   * \class Quaternion
@@ -48,152 +197,13 @@
   * \sa  class AngleAxis, class Transform
   */
 
-template<typename Other,
-         int OtherRows=Other::RowsAtCompileTime,
-         int OtherCols=Other::ColsAtCompileTime>
-struct ei_quaternionbase_assign_impl;
-
-template<typename Scalar> class Quaternion; // [XXX] => remove when Quaternion becomes Quaternion
-
-template<typename Derived>
-struct ei_traits<QuaternionBase<Derived> >
-{
-  typedef typename ei_traits<Derived>::Scalar Scalar;
-  enum {
-    PacketAccess = ei_traits<Derived>::PacketAccess
-  };
-};
-
-template<class Derived>
-class QuaternionBase : public RotationBase<Derived, 3>
-{
-  typedef RotationBase<Derived, 3> Base;
-public:
-  using Base::operator*;
-
-  typedef typename ei_traits<QuaternionBase<Derived> >::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-
- // typedef typename Matrix<Scalar,4,1> Coefficients;
-  /** the type of a 3D vector */
-  typedef Matrix<Scalar,3,1> Vector3;
-  /** the equivalent rotation matrix type */
-  typedef Matrix<Scalar,3,3> Matrix3;
-  /** the equivalent angle-axis type */
-  typedef AngleAxis<Scalar> AngleAxisType;
-
-  /** \returns the \c x coefficient */
-  inline Scalar x() const { return this->derived().coeffs().coeff(0); }
-  /** \returns the \c y coefficient */
-  inline Scalar y() const { return this->derived().coeffs().coeff(1); }
-  /** \returns the \c z coefficient */
-  inline Scalar z() const { return this->derived().coeffs().coeff(2); }
-  /** \returns the \c w coefficient */
-  inline Scalar w() const { return this->derived().coeffs().coeff(3); }
-
-  /** \returns a reference to the \c x coefficient */
-  inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
-  /** \returns a reference to the \c y coefficient */
-  inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
-  /** \returns a reference to the \c z coefficient */
-  inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
-  /** \returns a reference to the \c w coefficient */
-  inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
-
-  /** \returns a read-only vector expression of the imaginary part (x,y,z) */
-  inline const VectorBlock<typename ei_traits<Derived>::Coefficients,3> vec() const { return this->derived().coeffs().template start<3>(); }
-
-  /** \returns a vector expression of the imaginary part (x,y,z) */
-  inline VectorBlock<typename ei_traits<Derived>::Coefficients,3> vec() { return this->derived().coeffs().template start<3>(); }
-
-  /** \returns a read-only vector expression of the coefficients (x,y,z,w) */
-  inline const typename ei_traits<Derived>::Coefficients& coeffs() const { return this->derived().coeffs(); }
-
-  /** \returns a vector expression of the coefficients (x,y,z,w) */
-  inline typename ei_traits<Derived>::Coefficients& coeffs() { return this->derived().coeffs(); }
-
-  template<class OtherDerived> QuaternionBase& operator=(const QuaternionBase<OtherDerived>& other);
-  QuaternionBase& operator=(const AngleAxisType& aa);
-  template<class OtherDerived>
-  QuaternionBase& operator=(const MatrixBase<OtherDerived>& m);
-
-  /** \returns a quaternion representing an identity rotation
-    * \sa MatrixBase::Identity()
-    */
-  inline static Quaternion<Scalar> Identity() { return Quaternion<Scalar>(1, 0, 0, 0); }
-
-  /** \sa Quaternion2::Identity(), MatrixBase::setIdentity()
-    */
-  inline QuaternionBase& setIdentity() { coeffs() << 0, 0, 0, 1; return *this; }
-
-  /** \returns the squared norm of the quaternion's coefficients
-    * \sa Quaternion2::norm(), MatrixBase::squaredNorm()
-    */
-  inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }
-
-  /** \returns the norm of the quaternion's coefficients
-    * \sa Quaternion2::squaredNorm(), MatrixBase::norm()
-    */
-  inline Scalar norm() const { return coeffs().norm(); }
-
-  /** Normalizes the quaternion \c *this
-    * \sa normalized(), MatrixBase::normalize() */
-  inline void normalize() { coeffs().normalize(); }
-  /** \returns a normalized version of \c *this
-    * \sa normalize(), MatrixBase::normalized() */
-  inline Quaternion<Scalar> normalized() const { return Quaternion<Scalar>(coeffs().normalized()); }
-
-    /** \returns the dot product of \c *this and \a other
-    * Geometrically speaking, the dot product of two unit quaternions
-    * corresponds to the cosine of half the angle between the two rotations.
-    * \sa angularDistance()
-    */
-  template<class OtherDerived> inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }
-
-  template<class OtherDerived> inline Scalar angularDistance(const QuaternionBase<OtherDerived>& other) const;
-
-  Matrix3 toRotationMatrix(void) const;
-
-  template<typename Derived1, typename Derived2>
-  QuaternionBase& setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
-
-  template<class OtherDerived> inline Quaternion<Scalar> operator* (const QuaternionBase<OtherDerived>& q) const;
-  template<class OtherDerived> inline QuaternionBase& operator*= (const QuaternionBase<OtherDerived>& q);
-
-  Quaternion<Scalar> inverse(void) const;
-  Quaternion<Scalar> conjugate(void) const;
-
-  template<class OtherDerived> Quaternion<Scalar> slerp(Scalar t, const QuaternionBase<OtherDerived>& other) const;
-
-  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
-    * determined by \a prec.
-    *
-    * \sa MatrixBase::isApprox() */
-  bool isApprox(const QuaternionBase& other, RealScalar prec = precision<Scalar>()) const
-  { return coeffs().isApprox(other.coeffs(), prec); }
-
-  Vector3 _transformVector(Vector3 v) const;
-
-  /** \returns \c *this with scalar type casted to \a NewScalarType
-    *
-    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
-    * then this function smartly returns a const reference to \c *this.
-    */
-  template<typename NewScalarType>
-  inline typename ei_cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const
-  {
-    return typename ei_cast_return_type<Derived,Quaternion<NewScalarType> >::type(
-      coeffs().template cast<NewScalarType>());
-  }
-};
-
 template<typename _Scalar>
 struct ei_traits<Quaternion<_Scalar> >
 {
   typedef _Scalar Scalar;
   typedef Matrix<_Scalar,4,1> Coefficients;
   enum{
-    PacketAccess = Aligned 
+    PacketAccess = Aligned
   };
 };
 
@@ -239,7 +249,7 @@ public:
   explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
 
   /** Copy constructor with scalar type conversion */
-  template<class Derived>
+  template<typename Derived>
   inline explicit Quaternion(const QuaternionBase<Derived>& other)
   { m_coeffs = other.coeffs().template cast<Scalar>(); }
 
@@ -250,16 +260,29 @@ protected:
   Coefficients m_coeffs;
 };
 
-/* ########### Map<Quaternion> */
+/** \ingroup Geometry_Module
+  * single precision quaternion type */
+typedef Quaternion<float> Quaternionf;
+/** \ingroup Geometry_Module
+  * double precision quaternion type */
+typedef Quaternion<double> Quaterniond;
+
+/***************************************************************************
+* Specialization of Map<Quaternion<Scalar>>
+***************************************************************************/
 
 /** \class Map<Quaternion>
   * \nonstableyet
   *
-  * \brief Expression of a quaternion
+  * \brief Expression of a quaternion from a memory buffer
   *
-  * \param Scalar the type of the vector of diagonal coefficients
+  * \param _Scalar the type of the Quaternion coefficients
+  * \param PacketAccess see class Map
   *
-  * \sa class Quaternion, class QuaternionBase
+  * This is a specialization of class Map for Quaternion. This class allows to view
+  * a 4 scalar memory buffer as an Eigen's  Quaternion object.
+  *
+  * \sa class Map, class Quaternion, class QuaternionBase
   */
 template<typename _Scalar, int _PacketAccess>
 struct ei_traits<Map<Quaternion<_Scalar>, _PacketAccess> >:
@@ -273,15 +296,23 @@ ei_traits<Quaternion<_Scalar> >
 };
 
 template<typename _Scalar, int PacketAccess>
-class Map<Quaternion<_Scalar>, PacketAccess > : public QuaternionBase<Map<Quaternion<_Scalar>, PacketAccess> >, ei_no_assignment_operator {
+class Map<Quaternion<_Scalar>, PacketAccess >
+  : public QuaternionBase<Map<Quaternion<_Scalar>, PacketAccess> >,
+    ei_no_assignment_operator
+{
   public:
-    
+
     typedef _Scalar Scalar;
+    typedef typename ei_traits<Map>::Coefficients Coefficients;
 
-    typedef typename ei_traits<Map<Quaternion<Scalar>, PacketAccess> >::Coefficients Coefficients;
+    /** Constructs a Mapped Quaternion object from the pointer \a coeffs
+      *
+      * The pointer \a coeffs must reference the four coeffecients of Quaternion in the following order:
+      * \code *coeffs == {x, y, z, w} \endcode
+      *
+      * If the template paramter PacketAccess is set to Aligned, then the pointer coeffs must be aligned. */
+    inline Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
 
-    inline Map<Quaternion<Scalar>, PacketAccess >(const Scalar* coeffs) : m_coeffs(coeffs) {}
-    
     inline Coefficients& coeffs() { return m_coeffs;}
     inline const Coefficients& coeffs() const { return m_coeffs;}
 
@@ -289,15 +320,20 @@ class Map<Quaternion<_Scalar>, PacketAccess > : public QuaternionBase<Map<Quater
     Coefficients m_coeffs;
 };
 
-typedef Map<Quaternion<double> > QuaternionMapd;
-typedef Map<Quaternion<float> > QuaternionMapf;
-typedef Map<Quaternion<double>, Aligned> QuaternionMapAlignedd;
-typedef Map<Quaternion<float>, Aligned> QuaternionMapAlignedf;
+typedef Map<Quaternion<double> >          QuaternionMapd;
+typedef Map<Quaternion<float> >           QuaternionMapf;
+typedef Map<Quaternion<double>, Aligned>  QuaternionMapAlignedd;
+typedef Map<Quaternion<float>, Aligned>   QuaternionMapAlignedf;
+
+/***************************************************************************
+* Implementation of QuaternionBase methods
+***************************************************************************/
 
 // Generic Quaternion * Quaternion product
-template<int Arch, class Derived, class OtherDerived, typename Scalar, int PacketAccess> struct ei_quat_product
+// This product can be specialized for a given architecture via the Arch template argument.
+template<int Arch, class Derived1, class Derived2, typename Scalar, int PacketAccess> struct ei_quat_product
 {
-  inline static Quaternion<Scalar> run(const QuaternionBase<Derived>& a, const QuaternionBase<OtherDerived>& b){
+  inline static Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
     return Quaternion<Scalar>
     (
       a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(),
@@ -311,21 +347,22 @@ template<int Arch, class Derived, class OtherDerived, typename Scalar, int Packe
 /** \returns the concatenation of two rotations as a quaternion-quaternion product */
 template <class Derived>
 template <class OtherDerived>
-inline Quaternion<typename ei_traits<QuaternionBase<Derived> >::Scalar> QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) const
+inline Quaternion<typename ei_traits<Derived>::Scalar>
+QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) const
 {
   EIGEN_STATIC_ASSERT((ei_is_same_type<typename Derived::Scalar, typename OtherDerived::Scalar>::ret),
    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-   return ei_quat_product<EiArch, Derived, OtherDerived, 
-                          typename ei_traits<Derived>::Scalar,
-                          ei_traits<Derived>::PacketAccess && ei_traits<OtherDerived>::PacketAccess>::run(*this, other);
+  return ei_quat_product<EiArch, Derived, OtherDerived,
+                         typename ei_traits<Derived>::Scalar,
+                         ei_traits<Derived>::PacketAccess && ei_traits<OtherDerived>::PacketAccess>::run(*this, other);
 }
 
 /** \sa operator*(Quaternion) */
 template <class Derived>
 template <class OtherDerived>
-inline QuaternionBase<Derived>& QuaternionBase<Derived>::operator*= (const QuaternionBase<OtherDerived>& other)
+inline Derived& QuaternionBase<Derived>::operator*= (const QuaternionBase<OtherDerived>& other)
 {
-  return (*this = *this * other);
+  return (derived() = derived() * other.derived());
 }
 
 /** Rotation of a vector by a quaternion.
@@ -350,21 +387,21 @@ QuaternionBase<Derived>::_transformVector(Vector3 v) const
 
 template<class Derived>
 template<class OtherDerived>
-inline QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const QuaternionBase<OtherDerived>& other)
+inline Derived& QuaternionBase<Derived>::operator=(const QuaternionBase<OtherDerived>& other)
 {
   coeffs() = other.coeffs();
-  return *this;
+  return derived();
 }
 
 /** Set \c *this from an angle-axis \a aa and returns a reference to \c *this
   */
 template<class Derived>
-inline QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const AngleAxisType& aa)
+inline Derived& QuaternionBase<Derived>::operator=(const AngleAxisType& aa)
 {
   Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings
   this->w() = ei_cos(ha);
   this->vec() = ei_sin(ha) * aa.axis();
-  return *this;
+  return derived();
 }
 
 /** Set \c *this from the expression \a xpr:
@@ -375,12 +412,12 @@ inline QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const AngleAx
 
 template<class Derived>
 template<class MatrixDerived>
-inline QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)
+inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)
 {
   EIGEN_STATIC_ASSERT((ei_is_same_type<typename Derived::Scalar, typename MatrixDerived::Scalar>::ret),
    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
   ei_quaternionbase_assign_impl<MatrixDerived>::run(*this, xpr.derived());
-  return *this;
+  return derived();
 }
 
 /** Convert the quaternion to a 3x3 rotation matrix. The quaternion is required to
@@ -434,7 +471,7 @@ QuaternionBase<Derived>::toRotationMatrix(void) const
   */
 template<class Derived>
 template<typename Derived1, typename Derived2>
-inline QuaternionBase<Derived>& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
+inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
 {
   Vector3 v0 = a.normalized();
   Vector3 v1 = b.normalized();
@@ -458,7 +495,7 @@ inline QuaternionBase<Derived>& QuaternionBase<Derived>::setFromTwoVectors(const
     Scalar w2 = (Scalar(1)+c)*Scalar(0.5);
     this->w() = ei_sqrt(w2);
     this->vec() = axis * ei_sqrt(Scalar(1) - w2);
-    return *this;
+    return derived();
   }
   Vector3 axis = v0.cross(v1);
   Scalar s = ei_sqrt((Scalar(1)+c)*Scalar(2));
@@ -466,17 +503,17 @@ inline QuaternionBase<Derived>& QuaternionBase<Derived>::setFromTwoVectors(const
   this->vec() = axis * invs;
   this->w() = s * Scalar(0.5);
 
-  return *this;
+  return derived();
 }
 
 /** \returns the multiplicative inverse of \c *this
   * Note that in most cases, i.e., if you simply want the opposite rotation,
   * and/or the quaternion is normalized, then it is enough to use the conjugate.
   *
-  * \sa Quaternion2::conjugate()
+  * \sa QuaternionBase::conjugate()
   */
 template <class Derived>
-inline Quaternion<typename ei_traits<QuaternionBase<Derived> >::Scalar> QuaternionBase<Derived>::inverse() const
+inline Quaternion<typename ei_traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const
 {
   // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite()  ??
   Scalar n2 = this->squaredNorm();
@@ -485,7 +522,7 @@ inline Quaternion<typename ei_traits<QuaternionBase<Derived> >::Scalar> Quaterni
   else
   {
     // return an invalid result to flag the error
-    return Quaternion<Scalar>(ei_traits<Derived>::Coefficients::Zero());
+    return Quaternion<Scalar>(Coefficients::Zero());
   }
 }
 
@@ -496,7 +533,8 @@ inline Quaternion<typename ei_traits<QuaternionBase<Derived> >::Scalar> Quaterni
   * \sa Quaternion2::inverse()
   */
 template <class Derived>
-inline Quaternion<typename ei_traits<QuaternionBase<Derived> >::Scalar> QuaternionBase<Derived>::conjugate() const
+inline Quaternion<typename ei_traits<Derived>::Scalar>
+QuaternionBase<Derived>::conjugate() const
 {
   return Quaternion<Scalar>(this->w(),-this->x(),-this->y(),-this->z());
 }
@@ -506,11 +544,12 @@ inline Quaternion<typename ei_traits<QuaternionBase<Derived> >::Scalar> Quaterni
   */
 template <class Derived>
 template <class OtherDerived>
-inline typename ei_traits<QuaternionBase<Derived> >::Scalar QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& other) const
+inline typename ei_traits<Derived>::Scalar
+QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& other) const
 {
   double d = ei_abs(this->dot(other));
   if (d>=1.0)
-    return 0;
+    return Scalar(0);
   return Scalar(2) * std::acos(d);
 }
 
@@ -519,13 +558,14 @@ inline typename ei_traits<QuaternionBase<Derived> >::Scalar QuaternionBase<Deriv
   */
 template <class Derived>
 template <class OtherDerived>
-Quaternion<typename ei_traits<QuaternionBase<Derived> >::Scalar> QuaternionBase<Derived>::slerp(Scalar t, const QuaternionBase<OtherDerived>& other) const
+Quaternion<typename ei_traits<Derived>::Scalar>
+QuaternionBase<Derived>::slerp(Scalar t, const QuaternionBase<OtherDerived>& other) const
 {
   static const Scalar one = Scalar(1) - precision<Scalar>();
   Scalar d = this->dot(other);
   Scalar absD = ei_abs(d);
   if (absD>=one)
-    return Quaternion<Scalar>(*this);
+    return Quaternion<Scalar>(derived());
 
   // theta is the angle between the 2 quaternions
   Scalar theta = std::acos(absD);
@@ -549,7 +589,7 @@ struct ei_quaternionbase_assign_impl<Other,3,3>
     // This algorithm comes from  "Quaternion Calculus and Fast Animation",
     // Ken Shoemake, 1987 SIGGRAPH course notes
     Scalar t = mat.trace();
-    if (t > 0)
+    if (t > Scalar(0))
     {
       t = ei_sqrt(t + Scalar(1.0));
       q.w() = Scalar(0.5)*t;
diff --git a/Eigen/src/SVD/SVD.h b/Eigen/src/SVD/SVD.h
index 3b424be04..254885873 100644
--- a/Eigen/src/SVD/SVD.h
+++ b/Eigen/src/SVD/SVD.h
@@ -436,14 +436,13 @@ struct ei_solve_retval<SVD<_MatrixType>, Rhs>
 
   template<typename Dest> void evalTo(Dest& dst) const
   {
-    const int cols = this->cols();
     ei_assert(rhs().rows() == dec().rows());
 
-    for (int j=0; j<cols; ++j)
+    for (int j=0; j<cols(); ++j)
     {
       Matrix<Scalar,MatrixType::RowsAtCompileTime,1> aux = dec().matrixU().adjoint() * rhs().col(j);
 
-      for (int i = 0; i <dec().rows(); ++i)
+      for (int i = 0; i < dec().rows(); ++i)
       {
         Scalar si = dec().singularValues().coeff(i);
         if(si == RealScalar(0))
@@ -451,8 +450,10 @@ struct ei_solve_retval<SVD<_MatrixType>, Rhs>
         else
           aux.coeffRef(i) /= si;
       }
-
-      dst.col(j) = dec().matrixV() * aux;
+      const int minsize = std::min(dec().rows(),dec().cols());
+      dst.col(j).start(minsize) = aux.start(minsize);
+      if(dec().cols()>dec().rows()) dst.col(j).end(cols()-minsize).setZero();
+      dst.col(j) = dec().matrixV() * dst.col(j);
     }
   }
 };
diff --git a/Eigen/src/Sparse/CholmodSupport.h b/Eigen/src/Sparse/CholmodSupport.h
index 30a33c3dc..f02374d7c 100644
--- a/Eigen/src/Sparse/CholmodSupport.h
+++ b/Eigen/src/Sparse/CholmodSupport.h
@@ -126,6 +126,7 @@ class SparseLLT<MatrixType,Cholmod> : public SparseLLT<MatrixType>
     typedef SparseLLT<MatrixType> Base;
     typedef typename Base::Scalar Scalar;
     typedef typename Base::RealScalar RealScalar;
+    typedef typename Base::CholMatrixType CholMatrixType;
     using Base::MatrixLIsDirty;
     using Base::SupernodalFactorIsDirty;
     using Base::m_flags;
@@ -154,7 +155,7 @@ class SparseLLT<MatrixType,Cholmod> : public SparseLLT<MatrixType>
       cholmod_finish(&m_cholmod);
     }
 
-    inline const typename Base::CholMatrixType& matrixL(void) const;
+    inline const CholMatrixType& matrixL() const;
 
     template<typename Derived>
     bool solveInPlace(MatrixBase<Derived> &b) const;
@@ -198,7 +199,7 @@ void SparseLLT<MatrixType,Cholmod>::compute(const MatrixType& a)
 }
 
 template<typename MatrixType>
-inline const typename SparseLLT<MatrixType>::CholMatrixType&
+inline const typename SparseLLT<MatrixType,Cholmod>::CholMatrixType&
 SparseLLT<MatrixType,Cholmod>::matrixL() const
 {
   if (m_status & MatrixLIsDirty)
diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h
index c1f473597..70173427f 100644
--- a/bench/BenchTimer.h
+++ b/bench/BenchTimer.h
@@ -2,7 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -27,7 +27,7 @@
 #define EIGEN_BENCH_TIMER_H
 
 #ifndef WIN32
-#include <sys/time.h>
+#include <time.h>
 #include <unistd.h>
 #else
 #define NOMINMAX
@@ -41,6 +41,11 @@ namespace Eigen
 {
 
 /** Elapsed time timer keeping the best try.
+  *
+  * On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID.
+  * On Windows we use QueryPerformanceCounter
+  *
+  * Important: on linux, you must link with -lrt
   */
 class BenchTimer
 {
@@ -83,10 +88,9 @@ public:
     QueryPerformanceCounter(&query_ticks);
     return query_ticks.QuadPart/m_frequency;
 #else
-      struct timeval tv;
-      struct timezone tz;
-      gettimeofday(&tv, &tz);
-      return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
+    timespec ts;
+    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
+    return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
 #endif
   }
 
diff --git a/bench/benchBlasGemm.cpp b/bench/benchBlasGemm.cpp
index 25458f823..a4a9e780a 100644
--- a/bench/benchBlasGemm.cpp
+++ b/bench/benchBlasGemm.cpp
@@ -178,13 +178,13 @@ using namespace Eigen;
 void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
 {
   for (uint j=0 ; j<nbloops ; ++j)
-      mc += (ma * mb).lazy();
+      mc.noalias() += ma * mb;
 }
 
 void bench_eigengemm_normal(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
 {
   for (uint j=0 ; j<nbloops ; ++j)
-    mc += Product<MyMatrix,MyMatrix,NormalProduct>(ma,mb).lazy();
+    mc.noalias() += GeneralProduct<MyMatrix,MyMatrix,UnrolledProduct>(ma,mb);
 }
 
 #define MYVERIFY(A,M) if (!(A)) { \
diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp
index 4b6cabb55..3104929ba 100644
--- a/bench/benchFFT.cpp
+++ b/bench/benchFFT.cpp
@@ -22,13 +22,10 @@
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
 
+#include <bench/BenchUtil.h>
 #include <complex>
 #include <vector>
 #include <Eigen/Core>
-#include <bench/BenchTimer.h>
-#ifdef USE_FFTW
-#include <fftw3.h>
-#endif
 
 #include <unsupported/Eigen/FFT>
 
diff --git a/doc/C01_QuickStartGuide.dox b/doc/C01_QuickStartGuide.dox
index 2943aed80..06b2595e7 100644
--- a/doc/C01_QuickStartGuide.dox
+++ b/doc/C01_QuickStartGuide.dox
@@ -278,18 +278,24 @@ Of course, fixed-size matrices can't be resized.
 
 
 \subsection TutorialMap Map
-Any memory buffer can be mapped as an Eigen expression:
-<table class="tutorial_code"><tr><td>
+Any memory buffer can be mapped as an Eigen expression using the Map() static method:
 \code
 std::vector<float> stlarray(10);
-Map<VectorXf>(&stlarray[0], stlarray.size()).setOnes();
-int data[4] = 1, 2, 3, 4;
-Matrix2i mat2x2(data);
-MatrixXi mat2x2 = Map<Matrix2i>(data);
-MatrixXi mat2x2 = Map<MatrixXi>(data,2,2);
+VectorXf::Map(&stlarray[0], stlarray.size()).squaredNorm();
+\endcode
+Here VectorXf::Map returns an object of class Map<VectorXf>, which behaves like a VectorXf except that it uses the existing array. You can write to this object, that will write to the existing array. You can also construct a named obtect to reuse it:
+\code
+float array[rows*cols];
+Map<MatrixXf> m(array,rows,cols);
+m = othermatrix1 * othermatrix2;
+m.eigenvalues();
+\endcode
+In the fixed-size case, no need to pass sizes:
+\code
+float array[9];
+Map<Matrix3d> m(array);
+Matrix3d::Map(array).setIdentity();
 \endcode
-</td></tr></table>
-
 
 
 \subsection TutorialCommaInit Comma initializer
diff --git a/test/eigensolver_complex.cpp b/test/eigensolver_complex.cpp
index 86e1b200a..756527d57 100644
--- a/test/eigensolver_complex.cpp
+++ b/test/eigensolver_complex.cpp
@@ -49,6 +49,10 @@ template<typename MatrixType> void eigensolver(const MatrixType& m)
   ComplexEigenSolver<MatrixType> ei1(a);
   VERIFY_IS_APPROX(a * ei1.eigenvectors(), ei1.eigenvectors() * ei1.eigenvalues().asDiagonal());
 
+  // Regression test for issue #66
+  MatrixType z = MatrixType::Zero(rows,cols);
+  ComplexEigenSolver<MatrixType> eiz(z);
+  VERIFY((eiz.eigenvalues().cwise()==0).all());
 }
 
 void test_eigensolver_complex()
@@ -58,4 +62,3 @@ void test_eigensolver_complex()
     CALL_SUBTEST_2( eigensolver(MatrixXcd(14,14)) );
   }
 }
-
diff --git a/test/sparse_solvers.cpp b/test/sparse_solvers.cpp
index 6c1bb9b33..a530a9515 100644
--- a/test/sparse_solvers.cpp
+++ b/test/sparse_solvers.cpp
@@ -109,7 +109,7 @@ template<typename Scalar> void sparse_solvers(int rows, int cols)
 
     initSPD(density, refMat2, m2);
 
-    refMat2.llt().solve(b, &refX);
+    refX = refMat2.llt().solve(b);
     typedef SparseMatrix<Scalar,LowerTriangular|SelfAdjoint> SparseSelfAdjointMatrix;
     if (!NumTraits<Scalar>::IsComplex)
     {
@@ -152,7 +152,7 @@ template<typename Scalar> void sparse_solvers(int rows, int cols)
     refMat2 += refMat2.adjoint();
     refMat2.diagonal() *= 0.5;
 
-    refMat2.llt().solve(b, &refX); // FIXME use LLT to compute the reference because LDLT seems to fail with large matrices
+    refX = refMat2.llt().solve(b); // FIXME use LLT to compute the reference because LDLT seems to fail with large matrices
     typedef SparseMatrix<Scalar,UpperTriangular|SelfAdjoint> SparseSelfAdjointMatrix;
     x = b;
     SparseLDLT<SparseSelfAdjointMatrix> ldlt(m2);
diff --git a/unsupported/Eigen/Complex b/unsupported/Eigen/Complex
index e1c41ab38..04228c95a 100644
--- a/unsupported/Eigen/Complex
+++ b/unsupported/Eigen/Complex
@@ -33,14 +33,26 @@
 
 namespace Eigen {
 
-template <typename _NativePtr,typename _PunnedPtr>
+template <typename _NativeData,typename _PunnedData>
 struct castable_pointer
 {
-    castable_pointer(_NativePtr ptr) : _ptr(ptr) {}
-    operator _NativePtr ()  {return _ptr;}
-    operator _PunnedPtr ()  {return reinterpret_cast<_PunnedPtr>(_ptr);}
+    castable_pointer(_NativeData * ptr) : _ptr(ptr) { }
+    operator _NativeData * ()  {return _ptr;}
+    operator _PunnedData * ()  {return reinterpret_cast<_PunnedData*>(_ptr);}
+    operator const _NativeData * () const {return _ptr;}
+    operator const _PunnedData * () const {return reinterpret_cast<_PunnedData*>(_ptr);}
     private: 
-    _NativePtr _ptr;
+    _NativeData *  _ptr;
+};
+
+template <typename _NativeData,typename _PunnedData>
+struct const_castable_pointer
+{
+    const_castable_pointer(_NativeData * ptr) : _ptr(ptr) { }
+    operator const _NativeData * () const {return _ptr;}
+    operator const _PunnedData * () const {return reinterpret_cast<_PunnedData*>(_ptr);}
+    private: 
+    _NativeData * _ptr;
 };
 
 template <typename T>
@@ -50,7 +62,8 @@ struct Complex
     typedef T value_type;
 
     // constructors
-    Complex(const T& re = T(), const T& im = T()) : _re(re),_im(im) { }
+    Complex() {}
+    Complex(const T& re, const T& im = T()) : _re(re),_im(im) { }
     Complex(const Complex&other ): _re(other.real()) ,_im(other.imag()) {}
 
     template<class X> 
@@ -58,40 +71,63 @@ struct Complex
     template<class X> 
     Complex(const std::complex<X>&other): _re(other.real()) ,_im(other.imag()) {}
 
-
     // allow binary access to the object as a std::complex
-    typedef castable_pointer< Complex<T>*, StandardComplex* > pointer_type;
-    typedef castable_pointer< const Complex<T>*, const StandardComplex* > const_pointer_type;
+    typedef castable_pointer< Complex<T>, StandardComplex > pointer_type;
+    typedef const_castable_pointer< Complex<T>, StandardComplex > const_pointer_type;
+
+    inline
     pointer_type operator & () {return pointer_type(this);}
+
+    inline
     const_pointer_type operator & () const {return const_pointer_type(this);}
 
+    inline
     operator StandardComplex () const {return std_type();}
+    inline
     operator StandardComplex & () {return std_type();}
 
-    StandardComplex std_type() const {return StandardComplex(real(),imag());}
+    inline
+    const StandardComplex & std_type() const {return *reinterpret_cast<const StandardComplex*>(this);}
+
+    inline
     StandardComplex & std_type() {return *reinterpret_cast<StandardComplex*>(this);}
 
 
     // every sort of accessor and mutator that has ever been in fashion.
     // For a brief history, search for "std::complex over-encapsulated"
     // http://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#387
+    inline
     const T & real() const {return _re;}
+    inline
     const T & imag() const {return _im;}
+    inline
     T & real() {return _re;}
+    inline
     T & imag() {return _im;}
+    inline
     T & real(const T & x) {return _re=x;}
+    inline
     T & imag(const T & x) {return _im=x;}
+    inline
     void set_real(const T & x) {_re = x;}
+    inline
     void set_imag(const T & x) {_im = x;}
 
     // *** complex member functions: ***
+    inline
     Complex<T>& operator= (const T& val) { _re=val;_im=0;return *this;  }
+    inline
     Complex<T>& operator+= (const T& val) {_re+=val;return *this;}
+    inline
     Complex<T>& operator-= (const T& val) {_re-=val;return *this;}
+    inline
     Complex<T>& operator*= (const T& val) {_re*=val;_im*=val;return *this;  }
+    inline
     Complex<T>& operator/= (const T& val) {_re/=val;_im/=val;return *this;  }
 
+    inline
     Complex& operator= (const Complex& rhs) {_re=rhs._re;_im=rhs._im;return *this;}
+    inline
     Complex& operator= (const StandardComplex& rhs) {_re=rhs.real();_im=rhs.imag();return *this;}
 
     template<class X> Complex<T>& operator= (const Complex<X>& rhs) { _re=rhs._re;_im=rhs._im;return *this;}
@@ -105,8 +141,7 @@ struct Complex
     T _im;
 };
 
-template <typename T>
-T ei_to_std( const T & x) {return x;}
+//template <typename T> T ei_to_std( const T & x) {return x;}
 
 template <typename T>
 std::complex<T> ei_to_std( const Complex<T> & x) {return x.std_type();}
@@ -165,7 +200,7 @@ operator<< (std::basic_ostream<charT,traits>& ostr, const Complex<T>& rhs)
  template<class T> Complex<T> log (const  Complex<T>&x){return log(ei_to_std(x));}
  template<class T> Complex<T> log10 (const  Complex<T>&x){return log10(ei_to_std(x));}
 
- template<class T> Complex<T> pow(const Complex<T>&x, int p) {return pow(ei_to_std(x),ei_to_std(p));}
+ template<class T> Complex<T> pow(const Complex<T>&x, int p) {return pow(ei_to_std(x),p);}
  template<class T> Complex<T> pow(const Complex<T>&x, const T&p) {return pow(ei_to_std(x),ei_to_std(p));}
  template<class T> Complex<T> pow(const Complex<T>&x, const Complex<T>&p) {return pow(ei_to_std(x),ei_to_std(p));}
  template<class T> Complex<T> pow(const T&x, const Complex<T>&p) {return pow(ei_to_std(x),ei_to_std(p));}
@@ -175,8 +210,20 @@ operator<< (std::basic_ostream<charT,traits>& ostr, const Complex<T>& rhs)
  template<class T> Complex<T> sqrt (const  Complex<T>&x){return sqrt(ei_to_std(x));}
  template<class T> Complex<T> tan (const  Complex<T>&x){return tan(ei_to_std(x));}
  template<class T> Complex<T> tanh (const  Complex<T>&x){return tanh(ei_to_std(x));}
-}
 
+  template<typename _Real> struct NumTraits<Complex<_Real> >
+  {
+    typedef _Real Real;
+    typedef Complex<_Real> FloatingPoint;
+    enum {
+      IsComplex = 1,
+      HasFloatingPoint = NumTraits<Real>::HasFloatingPoint,
+      ReadCost = 2,
+      AddCost = 2 * NumTraits<Real>::AddCost,
+      MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
+    };
+  };
+}
 #endif
 /* vim: set filetype=cpp et sw=2 ts=2 ai: */
 
diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
index 36afdde8d..8f7a2fcae 100644
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@@ -28,6 +28,7 @@
 #include <complex>
 #include <vector>
 #include <map>
+#include <Eigen/Core>
 
 #ifdef EIGEN_FFTW_DEFAULT
 // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size
@@ -65,49 +66,87 @@ class FFT
     typedef typename impl_type::Scalar Scalar;
     typedef typename impl_type::Complex Complex;
 
-    FFT(const impl_type & impl=impl_type() ) :m_impl(impl) { }
+    enum Flag {
+      Default=0, // goof proof
+      Unscaled=1,
+      HalfSpectrum=2,
+      // SomeOtherSpeedOptimization=4
+      Speedy=32767
+    };
 
-    template <typename _Input>
-    void fwd( Complex * dst, const _Input * src, int nfft)
+    FFT( const impl_type & impl=impl_type() , Flag flags=Default ) :m_impl(impl),m_flag(flags) { }
+
+    inline
+    bool HasFlag(Flag f) const { return (m_flag & (int)f) == f;}
+
+    inline
+    void SetFlag(Flag f) { m_flag |= (int)f;}
+
+    inline
+    void ClearFlag(Flag f) { m_flag &= (~(int)f);}
+
+    inline
+    void fwd( Complex * dst, const Scalar * src, int nfft)
+    {
+        m_impl.fwd(dst,src,nfft);
+        if ( HasFlag(HalfSpectrum) == false)
+          ReflectSpectrum(dst,nfft);
+    }
+
+    inline
+    void fwd( Complex * dst, const Complex * src, int nfft)
     {
         m_impl.fwd(dst,src,nfft);
     }
 
     template <typename _Input>
+    inline
     void fwd( std::vector<Complex> & dst, const std::vector<_Input> & src) 
     {
-        dst.resize( src.size() );
-        fwd( &dst[0],&src[0],src.size() );
+      if ( NumTraits<_Input>::IsComplex == 0 && HasFlag(HalfSpectrum) )
+        dst.resize( (src.size()>>1)+1);
+      else
+        dst.resize(src.size());
+      fwd(&dst[0],&src[0],src.size());
     }
 
     template<typename InputDerived, typename ComplexDerived>
+    inline
     void fwd( MatrixBase<ComplexDerived> & dst, const MatrixBase<InputDerived> & src)
     {
-        EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived)
-        EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
-        EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,InputDerived) // size at compile-time
-        EIGEN_STATIC_ASSERT((ei_is_same_type<typename ComplexDerived::Scalar, Complex>::ret),
-                            YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-        EIGEN_STATIC_ASSERT(int(InputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit,
-                            THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
-        dst.derived().resize( src.size() );
-        fwd( &dst[0],&src[0],src.size() );
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived)
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
+      EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,InputDerived) // size at compile-time
+      EIGEN_STATIC_ASSERT((ei_is_same_type<typename ComplexDerived::Scalar, Complex>::ret),
+            YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+      EIGEN_STATIC_ASSERT(int(InputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit,
+            THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
+
+      if ( NumTraits< typename InputDerived::Scalar >::IsComplex == 0 && HasFlag(HalfSpectrum) )
+        dst.derived().resize( (src.size()>>1)+1);
+      else
+        dst.derived().resize(src.size());
+      fwd( &dst[0],&src[0],src.size() );
     }
 
-    template <typename _Output>
-    void inv( _Output * dst, const Complex * src, int nfft)
+    inline
+    void inv( Complex * dst, const Complex * src, int nfft)
     {
         m_impl.inv( dst,src,nfft );
+        if ( HasFlag( Unscaled ) == false)
+          scale(dst,1./nfft,nfft);
     }
 
-    template <typename _Output>
-    void inv( std::vector<_Output> & dst, const std::vector<Complex> & src) 
+    inline
+    void inv( Scalar * dst, const Complex * src, int nfft)
     {
-        dst.resize( src.size() );
-        inv( &dst[0],&src[0],src.size() );
+        m_impl.inv( dst,src,nfft );
+        if ( HasFlag( Unscaled ) == false)
+          scale(dst,1./nfft,nfft);
     }
 
     template<typename OutputDerived, typename ComplexDerived>
+    inline
     void inv( MatrixBase<OutputDerived> & dst, const MatrixBase<ComplexDerived> & src)
     {
         EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived)
@@ -117,18 +156,52 @@ class FFT
                             YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
         EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit,
                             THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
-        dst.derived().resize( src.size() );
+
+        int nfft = src.size();
+        int nout = HasFlag(HalfSpectrum) ? ((nfft>>1)+1) : nfft;
+        dst.derived().resize( nout );
         inv( &dst[0],&src[0],src.size() );
     }
 
+    template <typename _Output>
+    inline
+    void inv( std::vector<_Output> & dst, const std::vector<Complex> & src)
+    {
+      if ( NumTraits<_Output>::IsComplex == 0 && HasFlag(HalfSpectrum) )
+        dst.resize( 2*(src.size()-1) );
+      else
+        dst.resize( src.size() );
+      inv( &dst[0],&src[0],dst.size() );
+    }
+
     // TODO: multi-dimensional FFTs
     
     // TODO: handle Eigen MatrixBase
      // ---> i added fwd and inv specializations above + unit test, is this enough? (bjacob)
 
+    inline
     impl_type & impl() {return m_impl;}
   private:
+
+    template <typename _It,typename _Val>
+    inline
+    void scale(_It x,_Val s,int nx)
+    {
+      for (int k=0;k<nx;++k)
+        *x++ *= s;
+    }
+
+    inline
+    void ReflectSpectrum(Complex * freq,int nfft)
+    {
+      // create the implicit right-half spectrum (conjugate-mirror of the left-half)
+      int nhbins=(nfft>>1)+1;
+      for (int k=nhbins;k < nfft; ++k )
+        freq[k] = conj(freq[nfft-k]);
+    }
+
     impl_type m_impl;
+    int m_flag;
 };
 }
 #endif
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index 2fb733a99..c4607c2b8 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -29,7 +29,7 @@ namespace Eigen {
 
 template<typename A, typename B>
 struct ei_make_coherent_impl {
-  static void run(A& a, B& b) {}
+  static void run(A&, B&) {}
 };
 
 // resize a to match b is a.size()==0, and conversely.
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h b/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h
index 69ea9144e..03c82b7e8 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h
@@ -35,7 +35,7 @@ namespace Eigen {
   * This class represents a scalar value while tracking its respective derivatives.
   *
   * It supports the following list of global math function:
-  *  - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos, 
+  *  - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos,
   *  - ei_abs, ei_sqrt, ei_pow, ei_exp, ei_log, ei_sin, ei_cos,
   *  - ei_conj, ei_real, ei_imag, ei_abs2.
   *
@@ -48,130 +48,150 @@ template<typename ValueType, typename JacobianType>
 class AutoDiffVector
 {
   public:
-    typedef typename ei_traits<ValueType>::Scalar Scalar;
-    
+    //typedef typename ei_traits<ValueType>::Scalar Scalar;
+    typedef typename ei_traits<ValueType>::Scalar BaseScalar;
+    typedef AutoDiffScalar<Matrix<BaseScalar,JacobianType::RowsAtCompileTime,1> > ActiveScalar;
+    typedef ActiveScalar Scalar;
+    typedef AutoDiffScalar<typename JacobianType::ColXpr> CoeffType;
+
     inline AutoDiffVector() {}
-    
+
     inline AutoDiffVector(const ValueType& values)
       : m_values(values)
     {
       m_jacobian.setZero();
     }
-    
+
+
+    CoeffType operator[] (int i) { return CoeffType(m_values[i], m_jacobian.col(i)); }
+    const CoeffType operator[] (int i) const { return CoeffType(m_values[i], m_jacobian.col(i)); }
+
+    CoeffType operator() (int i) { return CoeffType(m_values[i], m_jacobian.col(i)); }
+    const CoeffType operator() (int i) const { return CoeffType(m_values[i], m_jacobian.col(i)); }
+
+    CoeffType coeffRef(int i) { return CoeffType(m_values[i], m_jacobian.col(i)); }
+    const CoeffType coeffRef(int i) const { return CoeffType(m_values[i], m_jacobian.col(i)); }
+
+    int size() const { return m_values.size(); }
+
+    // FIXME here we could return an expression of the sum
+    Scalar sum() const { /*std::cerr << "sum \n\n";*/ /*std::cerr << m_jacobian.rowwise().sum() << "\n\n";*/ return Scalar(m_values.sum(), m_jacobian.rowwise().sum()); }
+
+
     inline AutoDiffVector(const ValueType& values, const JacobianType& jac)
       : m_values(values), m_jacobian(jac)
     {}
-    
+
     template<typename OtherValueType, typename OtherJacobianType>
     inline AutoDiffVector(const AutoDiffVector<OtherValueType, OtherJacobianType>& other)
       : m_values(other.values()), m_jacobian(other.jacobian())
     {}
-    
+
     inline AutoDiffVector(const AutoDiffVector& other)
       : m_values(other.values()), m_jacobian(other.jacobian())
     {}
-    
+
     template<typename OtherValueType, typename OtherJacobianType>
-    inline AutoDiffScalar& operator=(const AutoDiffVector<OtherValueType, OtherJacobianType>& other)
+    inline AutoDiffVector& operator=(const AutoDiffVector<OtherValueType, OtherJacobianType>& other)
     {
       m_values = other.values();
       m_jacobian = other.jacobian();
       return *this;
     }
-    
+
     inline AutoDiffVector& operator=(const AutoDiffVector& other)
     {
       m_values = other.values();
       m_jacobian = other.jacobian();
       return *this;
     }
-    
+
     inline const ValueType& values() const { return m_values; }
     inline ValueType& values() { return m_values; }
-    
+
     inline const JacobianType& jacobian() const { return m_jacobian; }
     inline JacobianType& jacobian() { return m_jacobian; }
-    
+
     template<typename OtherValueType,typename OtherJacobianType>
     inline const AutoDiffVector<
-      CwiseBinaryOp<ei_scalar_sum_op<Scalar>,ValueType,OtherValueType> >
-      CwiseBinaryOp<ei_scalar_sum_op<Scalar>,JacobianType,OtherJacobianType> >
-    operator+(const AutoDiffScalar<OtherDerType>& other) const
+      typename MakeCwiseBinaryOp<ei_scalar_sum_op<BaseScalar>,ValueType,OtherValueType>::Type,
+      typename MakeCwiseBinaryOp<ei_scalar_sum_op<BaseScalar>,JacobianType,OtherJacobianType>::Type >
+    operator+(const AutoDiffVector<OtherValueType,OtherJacobianType>& other) const
     {
       return AutoDiffVector<
-      CwiseBinaryOp<ei_scalar_sum_op<Scalar>,ValueType,OtherValueType> >
-      CwiseBinaryOp<ei_scalar_sum_op<Scalar>,JacobianType,OtherJacobianType> >(
+      typename MakeCwiseBinaryOp<ei_scalar_sum_op<BaseScalar>,ValueType,OtherValueType>::Type,
+      typename MakeCwiseBinaryOp<ei_scalar_sum_op<BaseScalar>,JacobianType,OtherJacobianType>::Type >(
         m_values + other.values(),
         m_jacobian + other.jacobian());
     }
-    
+
     template<typename OtherValueType, typename OtherJacobianType>
     inline AutoDiffVector&
-    operator+=(const AutoDiffVector<OtherValueType,OtherDerType>& other)
+    operator+=(const AutoDiffVector<OtherValueType,OtherJacobianType>& other)
     {
       m_values += other.values();
       m_jacobian += other.jacobian();
       return *this;
     }
-    
+
     template<typename OtherValueType,typename OtherJacobianType>
     inline const AutoDiffVector<
-      CwiseBinaryOp<ei_scalar_difference_op<Scalar>,ValueType,OtherValueType> >
-      CwiseBinaryOp<ei_scalar_difference_op<Scalar>,JacobianType,OtherJacobianType> >
-    operator-(const AutoDiffScalar<OtherDerType>& other) const
+      typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>,ValueType,OtherValueType>::Type,
+      typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>,JacobianType,OtherJacobianType>::Type >
+    operator-(const AutoDiffVector<OtherValueType,OtherJacobianType>& other) const
     {
       return AutoDiffVector<
-      CwiseBinaryOp<ei_scalar_difference_op<Scalar>,ValueType,OtherValueType> >
-      CwiseBinaryOp<ei_scalar_difference_op<Scalar>,JacobianType,OtherJacobianType> >(
-        m_values - other.values(),
-        m_jacobian - other.jacobian());
+        typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>,ValueType,OtherValueType>::Type,
+        typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>,JacobianType,OtherJacobianType>::Type >(
+          m_values - other.values(),
+          m_jacobian - other.jacobian());
     }
-    
+
     template<typename OtherValueType, typename OtherJacobianType>
     inline AutoDiffVector&
-    operator-=(const AutoDiffVector<OtherValueType,OtherDerType>& other)
+    operator-=(const AutoDiffVector<OtherValueType,OtherJacobianType>& other)
     {
       m_values -= other.values();
       m_jacobian -= other.jacobian();
       return *this;
     }
-    
+
     inline const AutoDiffVector<
-      CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, ValueType>
-      CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, JacobianType> >
+      typename MakeCwiseUnaryOp<ei_scalar_opposite_op<Scalar>, ValueType>::Type,
+      typename MakeCwiseUnaryOp<ei_scalar_opposite_op<Scalar>, JacobianType>::Type >
     operator-() const
     {
       return AutoDiffVector<
-      CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, ValueType>
-      CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, JacobianType> >(
-        -m_values,
-        -m_jacobian);
+        typename MakeCwiseUnaryOp<ei_scalar_opposite_op<Scalar>, ValueType>::Type,
+        typename MakeCwiseUnaryOp<ei_scalar_opposite_op<Scalar>, JacobianType>::Type >(
+          -m_values,
+          -m_jacobian);
     }
-    
+
     inline const AutoDiffVector<
-      CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>
-      CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType> >
-    operator*(const Scalar& other) const
+      typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>::Type,
+      typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType>::Type>
+    operator*(const BaseScalar& other) const
     {
       return AutoDiffVector<
-        CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>
-        CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType> >(
+        typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>::Type,
+        typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType>::Type >(
           m_values * other,
-          (m_jacobian * other));
+          m_jacobian * other);
     }
-    
+
     friend inline const AutoDiffVector<
-      CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>
-      CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType> >
+      typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>::Type,
+      typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType>::Type >
     operator*(const Scalar& other, const AutoDiffVector& v)
     {
       return AutoDiffVector<
-        CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>
-        CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType> >(
+        typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>::Type,
+        typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType>::Type >(
           v.values() * other,
           v.jacobian() * other);
     }
-    
+
 //     template<typename OtherValueType,typename OtherJacobianType>
 //     inline const AutoDiffVector<
 //       CwiseBinaryOp<ei_scalar_multiple_op<Scalar>, ValueType, OtherValueType>
@@ -188,25 +208,25 @@ class AutoDiffVector
 //             m_values.cwise() * other.values(),
 //             (m_jacobian * other.values()).nestByValue() + (m_values * other.jacobian()).nestByValue());
 //     }
-    
+
     inline AutoDiffVector& operator*=(const Scalar& other)
     {
       m_values *= other;
       m_jacobian *= other;
       return *this;
     }
-    
+
     template<typename OtherValueType,typename OtherJacobianType>
     inline AutoDiffVector& operator*=(const AutoDiffVector<OtherValueType,OtherJacobianType>& other)
     {
       *this = *this * other;
       return *this;
     }
-    
+
   protected:
     ValueType m_values;
     JacobianType m_jacobian;
-    
+
 };
 
 }
diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h
index e1f67f334..a66b7398c 100644
--- a/unsupported/Eigen/src/FFT/ei_fftw_impl.h
+++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h
@@ -166,6 +166,7 @@
         m_plans.clear();
       }
 
+      // complex-to-complex forward FFT
       inline
       void fwd( Complex * dst,const Complex *src,int nfft)
       {
@@ -177,9 +178,6 @@
       void fwd( Complex * dst,const Scalar * src,int nfft) 
       {
           get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src) ,nfft);
-          int nhbins=(nfft>>1)+1;
-          for (int k=nhbins;k < nfft; ++k )
-              dst[k] = conj(dst[nfft-k]);
       }
 
       // inverse complex-to-complex
@@ -187,12 +185,6 @@
       void inv(Complex * dst,const Complex  *src,int nfft)
       {
         get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft );
-
-        //TODO move scaling to Eigen::FFT
-        // scaling
-        Scalar s = Scalar(1.)/nfft;
-        for (int k=0;k<nfft;++k)
-          dst[k] *= s;
       }
 
       // half-complex to scalar
@@ -200,11 +192,6 @@
       void inv( Scalar * dst,const Complex * src,int nfft) 
       {
         get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft );
-
-        //TODO move scaling to Eigen::FFT
-        Scalar s = Scalar(1.)/nfft;
-        for (int k=0;k<nfft;++k)
-          dst[k] *= s;
       }
 
   protected:
@@ -222,3 +209,5 @@
           return m_plans[key];
       }
   };
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
+
diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
index c068d8765..5c958d1ec 100644
--- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
+++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
@@ -27,388 +27,384 @@
   // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft
   // Copyright 2003-2009 Mark Borgerding
 
-  template <typename _Scalar>
-    struct ei_kiss_cpx_fft
+template <typename _Scalar>
+struct ei_kiss_cpx_fft
+{
+  typedef _Scalar Scalar;
+  typedef std::complex<Scalar> Complex;
+  std::vector<Complex> m_twiddles;
+  std::vector<int> m_stageRadix;
+  std::vector<int> m_stageRemainder;
+  std::vector<Complex> m_scratchBuf;
+  bool m_inverse;
+
+  inline
+    void make_twiddles(int nfft,bool inverse)
     {
-      typedef _Scalar Scalar;
-      typedef std::complex<Scalar> Complex;
-      std::vector<Complex> m_twiddles;
-      std::vector<int> m_stageRadix;
-      std::vector<int> m_stageRemainder;
-      std::vector<Complex> m_scratchBuf;
-      bool m_inverse;
+      m_inverse = inverse;
+      m_twiddles.resize(nfft);
+      Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
+      for (int i=0;i<nfft;++i)
+        m_twiddles[i] = exp( Complex(0,i*phinc) );
+    }
 
-      void make_twiddles(int nfft,bool inverse)
-      {
-        m_inverse = inverse;
-        m_twiddles.resize(nfft);
-        Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
-        for (int i=0;i<nfft;++i)
-          m_twiddles[i] = exp( Complex(0,i*phinc) );
-      }
-
-      void factorize(int nfft)
-      {
-        //start factoring out 4's, then 2's, then 3,5,7,9,...
-        int n= nfft;
-        int p=4;
-        do {
-          while (n % p) {
-            switch (p) {
-              case 4: p = 2; break;
-              case 2: p = 3; break;
-              default: p += 2; break;
-            }
-            if (p*p>n)
-              p=n;// impossible to have a factor > sqrt(n)
-          }
-          n /= p;
-          m_stageRadix.push_back(p);
-          m_stageRemainder.push_back(n);
-          if ( p > 5 )
-              m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic
-        }while(n>1);
-      }
-
-      template <typename _Src>
-        void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
-        {
-          int p = m_stageRadix[stage];
-          int m = m_stageRemainder[stage];
-          Complex * Fout_beg = xout;
-          Complex * Fout_end = xout + p*m;
-
-          if (m>1) {
-            do{
-              // recursive call:
-              // DFT of size m*p performed by doing
-              // p instances of smaller DFTs of size m, 
-              // each one takes a decimated version of the input
-              work(stage+1, xout , xin, fstride*p,in_stride);
-              xin += fstride*in_stride;
-            }while( (xout += m) != Fout_end );
-          }else{
-            do{
-              *xout = *xin;
-              xin += fstride*in_stride;
-            }while(++xout != Fout_end );
-          }
-          xout=Fout_beg;
-
-          // recombine the p smaller DFTs 
-          switch (p) {
-            case 2: bfly2(xout,fstride,m); break;
-            case 3: bfly3(xout,fstride,m); break;
-            case 4: bfly4(xout,fstride,m); break;
-            case 5: bfly5(xout,fstride,m); break;
-            default: bfly_generic(xout,fstride,m,p); break;
-          }
-        }
-
-      inline
-      void bfly2( Complex * Fout, const size_t fstride, int m)
-      {
-        for (int k=0;k<m;++k) {
-          Complex t = Fout[m+k] * m_twiddles[k*fstride];
-          Fout[m+k] = Fout[k] - t;
-          Fout[k] += t;
+  void factorize(int nfft)
+  {
+    //start factoring out 4's, then 2's, then 3,5,7,9,...
+    int n= nfft;
+    int p=4;
+    do {
+      while (n % p) {
+        switch (p) {
+          case 4: p = 2; break;
+          case 2: p = 3; break;
+          default: p += 2; break;
         }
+        if (p*p>n)
+          p=n;// impossible to have a factor > sqrt(n)
       }
+      n /= p;
+      m_stageRadix.push_back(p);
+      m_stageRemainder.push_back(n);
+      if ( p > 5 )
+        m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic
+    }while(n>1);
+  }
 
-      inline
-      void bfly4( Complex * Fout, const size_t fstride, const size_t m)
-      {
-        Complex scratch[6];
-        int negative_if_inverse = m_inverse * -2 +1;
-        for (size_t k=0;k<m;++k) {
-          scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
-          scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
-          scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
-          scratch[5] = Fout[k] - scratch[1];
-
-          Fout[k] += scratch[1];
-          scratch[3] = scratch[0] + scratch[2];
-          scratch[4] = scratch[0] - scratch[2];
-          scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
-
-          Fout[k+2*m]  = Fout[k] - scratch[3];
-          Fout[k] += scratch[3];
-          Fout[k+m] = scratch[5] + scratch[4];
-          Fout[k+3*m] = scratch[5] - scratch[4];
-        }
-      }
-
-      inline
-      void bfly3( Complex * Fout, const size_t fstride, const size_t m)
-      {
-        size_t k=m;
-        const size_t m2 = 2*m;
-        Complex *tw1,*tw2;
-        Complex scratch[5];
-        Complex epi3;
-        epi3 = m_twiddles[fstride*m];
-
-        tw1=tw2=&m_twiddles[0];
+  template <typename _Src>
+    inline
+    void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
+    {
+      int p = m_stageRadix[stage];
+      int m = m_stageRemainder[stage];
+      Complex * Fout_beg = xout;
+      Complex * Fout_end = xout + p*m;
 
+      if (m>1) {
         do{
-          scratch[1]=Fout[m] * *tw1;
-          scratch[2]=Fout[m2] * *tw2;
-
-          scratch[3]=scratch[1]+scratch[2];
-          scratch[0]=scratch[1]-scratch[2];
-          tw1 += fstride;
-          tw2 += fstride*2;
-          Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
-          scratch[0] *= epi3.imag();
-          *Fout += scratch[3];
-          Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
-          Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
-          ++Fout;
-        }while(--k);
+          // recursive call:
+          // DFT of size m*p performed by doing
+          // p instances of smaller DFTs of size m, 
+          // each one takes a decimated version of the input
+          work(stage+1, xout , xin, fstride*p,in_stride);
+          xin += fstride*in_stride;
+        }while( (xout += m) != Fout_end );
+      }else{
+        do{
+          *xout = *xin;
+          xin += fstride*in_stride;
+        }while(++xout != Fout_end );
       }
+      xout=Fout_beg;
 
-      inline
-      void bfly5( Complex * Fout, const size_t fstride, const size_t m)
-      {
-        Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
-        size_t u;
-        Complex scratch[13];
-        Complex * twiddles = &m_twiddles[0];
-        Complex *tw;
-        Complex ya,yb;
-        ya = twiddles[fstride*m];
-        yb = twiddles[fstride*2*m];
-
-        Fout0=Fout;
-        Fout1=Fout0+m;
-        Fout2=Fout0+2*m;
-        Fout3=Fout0+3*m;
-        Fout4=Fout0+4*m;
-
-        tw=twiddles;
-        for ( u=0; u<m; ++u ) {
-          scratch[0] = *Fout0;
-
-          scratch[1]  = *Fout1 * tw[u*fstride];
-          scratch[2]  = *Fout2 * tw[2*u*fstride];
-          scratch[3]  = *Fout3 * tw[3*u*fstride];
-          scratch[4]  = *Fout4 * tw[4*u*fstride];
-
-          scratch[7] = scratch[1] + scratch[4];
-          scratch[10] = scratch[1] - scratch[4];
-          scratch[8] = scratch[2] + scratch[3];
-          scratch[9] = scratch[2] - scratch[3];
-
-          *Fout0 +=  scratch[7];
-          *Fout0 +=  scratch[8];
-
-          scratch[5] = scratch[0] + Complex(
-              (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
-              (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
-              );
-
-          scratch[6] = Complex(
-              (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
-              -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
-              );
-
-          *Fout1 = scratch[5] - scratch[6];
-          *Fout4 = scratch[5] + scratch[6];
-
-          scratch[11] = scratch[0] +
-            Complex(
-                (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
-                (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
-                );
-
-          scratch[12] = Complex(
-              -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
-              (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
-              );
-
-          *Fout2=scratch[11]+scratch[12];
-          *Fout3=scratch[11]-scratch[12];
-
-          ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
-        }
+      // recombine the p smaller DFTs 
+      switch (p) {
+        case 2: bfly2(xout,fstride,m); break;
+        case 3: bfly3(xout,fstride,m); break;
+        case 4: bfly4(xout,fstride,m); break;
+        case 5: bfly5(xout,fstride,m); break;
+        default: bfly_generic(xout,fstride,m,p); break;
       }
+    }
 
-      /* perform the butterfly for one stage of a mixed radix FFT */
-      inline
-      void bfly_generic(
-          Complex * Fout,
-          const size_t fstride,
-          int m,
-          int p
-          )
-      {
-        int u,k,q1,q;
-        Complex * twiddles = &m_twiddles[0];
-        Complex t;
-        int Norig = m_twiddles.size();
-        Complex * scratchbuf = &m_scratchBuf[0];
-
-        for ( u=0; u<m; ++u ) {
-          k=u;
-          for ( q1=0 ; q1<p ; ++q1 ) {
-            scratchbuf[q1] = Fout[ k  ];
-            k += m;
-          }
-
-          k=u;
-          for ( q1=0 ; q1<p ; ++q1 ) {
-            int twidx=0;
-            Fout[ k ] = scratchbuf[0];
-            for (q=1;q<p;++q ) {
-              twidx += fstride * k;
-              if (twidx>=Norig) twidx-=Norig;
-              t=scratchbuf[q] * twiddles[twidx];
-              Fout[ k ] += t;
-            }
-            k += m;
-          }
-        }
-      }
-    };
-
-  template <typename _Scalar>
-    struct ei_kissfft_impl
+  inline
+    void bfly2( Complex * Fout, const size_t fstride, int m)
     {
-      typedef _Scalar Scalar;
-      typedef std::complex<Scalar> Complex;
-
-      void clear() 
-      {
-        m_plans.clear();
-        m_realTwiddles.clear();
+      for (int k=0;k<m;++k) {
+        Complex t = Fout[m+k] * m_twiddles[k*fstride];
+        Fout[m+k] = Fout[k] - t;
+        Fout[k] += t;
       }
+    }
 
-      template <typename _Src>
-      inline
-        void fwd( Complex * dst,const _Src *src,int nfft)
-        {
-          get_plan(nfft,false).work(0, dst, src, 1,1);
+  inline
+    void bfly4( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      Complex scratch[6];
+      int negative_if_inverse = m_inverse * -2 +1;
+      for (size_t k=0;k<m;++k) {
+        scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
+        scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
+        scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
+        scratch[5] = Fout[k] - scratch[1];
+
+        Fout[k] += scratch[1];
+        scratch[3] = scratch[0] + scratch[2];
+        scratch[4] = scratch[0] - scratch[2];
+        scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
+
+        Fout[k+2*m]  = Fout[k] - scratch[3];
+        Fout[k] += scratch[3];
+        Fout[k+m] = scratch[5] + scratch[4];
+        Fout[k+3*m] = scratch[5] - scratch[4];
+      }
+    }
+
+  inline
+    void bfly3( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      size_t k=m;
+      const size_t m2 = 2*m;
+      Complex *tw1,*tw2;
+      Complex scratch[5];
+      Complex epi3;
+      epi3 = m_twiddles[fstride*m];
+
+      tw1=tw2=&m_twiddles[0];
+
+      do{
+        scratch[1]=Fout[m] * *tw1;
+        scratch[2]=Fout[m2] * *tw2;
+
+        scratch[3]=scratch[1]+scratch[2];
+        scratch[0]=scratch[1]-scratch[2];
+        tw1 += fstride;
+        tw2 += fstride*2;
+        Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
+        scratch[0] *= epi3.imag();
+        *Fout += scratch[3];
+        Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
+        Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
+        ++Fout;
+      }while(--k);
+    }
+
+  inline
+    void bfly5( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+      size_t u;
+      Complex scratch[13];
+      Complex * twiddles = &m_twiddles[0];
+      Complex *tw;
+      Complex ya,yb;
+      ya = twiddles[fstride*m];
+      yb = twiddles[fstride*2*m];
+
+      Fout0=Fout;
+      Fout1=Fout0+m;
+      Fout2=Fout0+2*m;
+      Fout3=Fout0+3*m;
+      Fout4=Fout0+4*m;
+
+      tw=twiddles;
+      for ( u=0; u<m; ++u ) {
+        scratch[0] = *Fout0;
+
+        scratch[1]  = *Fout1 * tw[u*fstride];
+        scratch[2]  = *Fout2 * tw[2*u*fstride];
+        scratch[3]  = *Fout3 * tw[3*u*fstride];
+        scratch[4]  = *Fout4 * tw[4*u*fstride];
+
+        scratch[7] = scratch[1] + scratch[4];
+        scratch[10] = scratch[1] - scratch[4];
+        scratch[8] = scratch[2] + scratch[3];
+        scratch[9] = scratch[2] - scratch[3];
+
+        *Fout0 +=  scratch[7];
+        *Fout0 +=  scratch[8];
+
+        scratch[5] = scratch[0] + Complex(
+            (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
+            (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
+            );
+
+        scratch[6] = Complex(
+            (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
+            -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
+            );
+
+        *Fout1 = scratch[5] - scratch[6];
+        *Fout4 = scratch[5] + scratch[6];
+
+        scratch[11] = scratch[0] +
+          Complex(
+              (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
+              (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
+              );
+
+        scratch[12] = Complex(
+            -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
+            (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
+            );
+
+        *Fout2=scratch[11]+scratch[12];
+        *Fout3=scratch[11]-scratch[12];
+
+        ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+      }
+    }
+
+  /* perform the butterfly for one stage of a mixed radix FFT */
+  inline
+    void bfly_generic(
+        Complex * Fout,
+        const size_t fstride,
+        int m,
+        int p
+        )
+    {
+      int u,k,q1,q;
+      Complex * twiddles = &m_twiddles[0];
+      Complex t;
+      int Norig = m_twiddles.size();
+      Complex * scratchbuf = &m_scratchBuf[0];
+
+      for ( u=0; u<m; ++u ) {
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+          scratchbuf[q1] = Fout[ k  ];
+          k += m;
         }
 
-      // real-to-complex forward FFT
-      // perform two FFTs of src even and src odd
-      // then twiddle to recombine them into the half-spectrum format
-      // then fill in the conjugate symmetric half
-      inline
-      void fwd( Complex * dst,const Scalar * src,int nfft) 
-      {
-        if ( nfft&3  ) {
-          // use generic mode for odd
-          get_plan(nfft,false).work(0, dst, src, 1,1);
-        }else{
-          int ncfft = nfft>>1;
-          int ncfft2 = nfft>>2;
-          Complex * rtw = real_twiddles(ncfft2);
-
-          // use optimized mode for even real
-          fwd( dst, reinterpret_cast<const Complex*> (src), ncfft);
-          Complex dc = dst[0].real() +  dst[0].imag();
-          Complex nyquist = dst[0].real() -  dst[0].imag();
-          int k;
-          for ( k=1;k <= ncfft2 ; ++k ) {
-            Complex fpk = dst[k];
-            Complex fpnk = conj(dst[ncfft-k]);
-            Complex f1k = fpk + fpnk;
-            Complex f2k = fpk - fpnk;
-            Complex tw= f2k * rtw[k-1];
-            dst[k] =  (f1k + tw) * Scalar(.5);
-            dst[ncfft-k] =  conj(f1k -tw)*Scalar(.5);
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+          int twidx=0;
+          Fout[ k ] = scratchbuf[0];
+          for (q=1;q<p;++q ) {
+            twidx += fstride * k;
+            if (twidx>=Norig) twidx-=Norig;
+            t=scratchbuf[q] * twiddles[twidx];
+            Fout[ k ] += t;
           }
-
-          // place conjugate-symmetric half at the end for completeness
-          // TODO: make this configurable ( opt-out )
-          for ( k=1;k < ncfft ; ++k )
-            dst[nfft-k] = conj(dst[k]);
-          dst[0] = dc;
-          dst[ncfft] = nyquist;
+          k += m;
         }
       }
+    }
+};
 
-      // inverse complex-to-complex
-      inline
-      void inv(Complex * dst,const Complex  *src,int nfft)
-      {
-        get_plan(nfft,true).work(0, dst, src, 1,1);
-        scale(dst, nfft, Scalar(1)/nfft );
-      }
+template <typename _Scalar>
+struct ei_kissfft_impl
+{
+  typedef _Scalar Scalar;
+  typedef std::complex<Scalar> Complex;
 
-      // half-complex to scalar
-      inline
-      void inv( Scalar * dst,const Complex * src,int nfft) 
-      {
-        if (nfft&3) {
-          m_tmpBuf.resize(nfft);
-          inv(&m_tmpBuf[0],src,nfft);
-          for (int k=0;k<nfft;++k)
-            dst[k] = m_tmpBuf[k].real();
-        }else{
-          // optimized version for multiple of 4
-          int ncfft = nfft>>1;
-          int ncfft2 = nfft>>2;
-          Complex * rtw = real_twiddles(ncfft2);
-          m_tmpBuf.resize(ncfft);
-          m_tmpBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() );
-          for (int k = 1; k <= ncfft / 2; ++k) {
-            Complex fk = src[k];
-            Complex fnkc = conj(src[ncfft-k]);
-            Complex fek = fk + fnkc;
-            Complex tmp = fk - fnkc;
-            Complex fok = tmp * conj(rtw[k-1]);
-            m_tmpBuf[k] = fek + fok;
-            m_tmpBuf[ncfft-k] = conj(fek - fok);
-          }
-          scale(&m_tmpBuf[0], ncfft, Scalar(1)/nfft );
-          get_plan(ncfft,true).work(0, reinterpret_cast<Complex*>(dst), &m_tmpBuf[0], 1,1);
+  void clear() 
+  {
+    m_plans.clear();
+    m_realTwiddles.clear();
+  }
+
+  inline
+    void fwd( Complex * dst,const Complex *src,int nfft)
+    {
+      get_plan(nfft,false).work(0, dst, src, 1,1);
+    }
+
+  // real-to-complex forward FFT
+  // perform two FFTs of src even and src odd
+  // then twiddle to recombine them into the half-spectrum format
+  // then fill in the conjugate symmetric half
+  inline
+    void fwd( Complex * dst,const Scalar * src,int nfft) 
+    {
+      if ( nfft&3  ) {
+        // use generic mode for odd
+        m_tmpBuf1.resize(nfft);
+        get_plan(nfft,false).work(0, &m_tmpBuf1[0], src, 1,1);
+        std::copy(m_tmpBuf1.begin(),m_tmpBuf1.begin()+(nfft>>1)+1,dst );
+      }else{
+        int ncfft = nfft>>1;
+        int ncfft2 = nfft>>2;
+        Complex * rtw = real_twiddles(ncfft2);
+
+        // use optimized mode for even real
+        fwd( dst, reinterpret_cast<const Complex*> (src), ncfft);
+        Complex dc = dst[0].real() +  dst[0].imag();
+        Complex nyquist = dst[0].real() -  dst[0].imag();
+        int k;
+        for ( k=1;k <= ncfft2 ; ++k ) {
+          Complex fpk = dst[k];
+          Complex fpnk = conj(dst[ncfft-k]);
+          Complex f1k = fpk + fpnk;
+          Complex f2k = fpk - fpnk;
+          Complex tw= f2k * rtw[k-1];
+          dst[k] =  (f1k + tw) * Scalar(.5);
+          dst[ncfft-k] =  conj(f1k -tw)*Scalar(.5);
         }
+        dst[0] = dc;
+        dst[ncfft] = nyquist;
       }
+    }
 
-      protected:
-      typedef ei_kiss_cpx_fft<Scalar> PlanData;
-      typedef std::map<int,PlanData> PlanMap;
+  // inverse complex-to-complex
+  inline
+    void inv(Complex * dst,const Complex  *src,int nfft)
+    {
+      get_plan(nfft,true).work(0, dst, src, 1,1);
+    }
 
-      PlanMap m_plans;
-      std::map<int, std::vector<Complex> > m_realTwiddles;
-      std::vector<Complex> m_tmpBuf;
-
-      inline
-      int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; }
-
-      inline
-      PlanData & get_plan(int nfft,bool inverse)
-      {
-        // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles
-        PlanData & pd = m_plans[ PlanKey(nfft,inverse) ];
-        if ( pd.m_twiddles.size() == 0 ) {
-          pd.make_twiddles(nfft,inverse);
-          pd.factorize(nfft);
+  // half-complex to scalar
+  inline
+    void inv( Scalar * dst,const Complex * src,int nfft) 
+    {
+      if (nfft&3) {
+        m_tmpBuf1.resize(nfft);
+        m_tmpBuf2.resize(nfft);
+        std::copy(src,src+(nfft>>1)+1,m_tmpBuf1.begin() );
+        for (int k=1;k<(nfft>>1)+1;++k)
+          m_tmpBuf1[nfft-k] = conj(m_tmpBuf1[k]);
+        inv(&m_tmpBuf2[0],&m_tmpBuf1[0],nfft);
+        for (int k=0;k<nfft;++k)
+          dst[k] = m_tmpBuf2[k].real();
+      }else{
+        // optimized version for multiple of 4
+        int ncfft = nfft>>1;
+        int ncfft2 = nfft>>2;
+        Complex * rtw = real_twiddles(ncfft2);
+        m_tmpBuf1.resize(ncfft);
+        m_tmpBuf1[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() );
+        for (int k = 1; k <= ncfft / 2; ++k) {
+          Complex fk = src[k];
+          Complex fnkc = conj(src[ncfft-k]);
+          Complex fek = fk + fnkc;
+          Complex tmp = fk - fnkc;
+          Complex fok = tmp * conj(rtw[k-1]);
+          m_tmpBuf1[k] = fek + fok;
+          m_tmpBuf1[ncfft-k] = conj(fek - fok);
         }
-        return pd;
+        get_plan(ncfft,true).work(0, reinterpret_cast<Complex*>(dst), &m_tmpBuf1[0], 1,1);
       }
+    }
 
-      inline
-      Complex * real_twiddles(int ncfft2)
-      {
-        std::vector<Complex> & twidref = m_realTwiddles[ncfft2];// creates new if not there
-        if ( (int)twidref.size() != ncfft2 ) {
-          twidref.resize(ncfft2);
-          int ncfft= ncfft2<<1;
-          Scalar pi =  acos( Scalar(-1) );
-          for (int k=1;k<=ncfft2;++k) 
-            twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) );
-        }
-        return &twidref[0];
-      }
+  protected:
+  typedef ei_kiss_cpx_fft<Scalar> PlanData;
+  typedef std::map<int,PlanData> PlanMap;
 
-      // TODO move scaling up into Eigen::FFT
-      inline
-      void scale(Complex *dst,int n,Scalar s) 
-      {
-        for (int k=0;k<n;++k)
-          dst[k] *= s;
+  PlanMap m_plans;
+  std::map<int, std::vector<Complex> > m_realTwiddles;
+  std::vector<Complex> m_tmpBuf1;
+  std::vector<Complex> m_tmpBuf2;
+
+  inline
+    int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; }
+
+  inline
+    PlanData & get_plan(int nfft,bool inverse)
+    {
+      // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles
+      PlanData & pd = m_plans[ PlanKey(nfft,inverse) ];
+      if ( pd.m_twiddles.size() == 0 ) {
+        pd.make_twiddles(nfft,inverse);
+        pd.factorize(nfft);
       }
-    };
+      return pd;
+    }
+
+  inline
+    Complex * real_twiddles(int ncfft2)
+    {
+      std::vector<Complex> & twidref = m_realTwiddles[ncfft2];// creates new if not there
+      if ( (int)twidref.size() != ncfft2 ) {
+        twidref.resize(ncfft2);
+        int ncfft= ncfft2<<1;
+        Scalar pi =  acos( Scalar(-1) );
+        for (int k=1;k<=ncfft2;++k) 
+          twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) );
+      }
+      return &twidref[0];
+    }
+};
+
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
+
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index d182c9abf..c75d5eb0b 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -26,3 +26,4 @@ if(FFTW_FOUND)
   ei_add_test(FFTW  "-DEIGEN_FFTW_DEFAULT " "-lfftw3 -lfftw3f -lfftw3l" )
 endif(FFTW_FOUND)
 
+ei_add_test(Complex)
diff --git a/unsupported/test/Complex.cpp b/unsupported/test/Complex.cpp
new file mode 100644
index 000000000..bedeb9f27
--- /dev/null
+++ b/unsupported/test/Complex.cpp
@@ -0,0 +1,77 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+#ifdef EIGEN_TEST_FUNC
+#  include "main.h"
+#else 
+#  include <iostream>
+#  define CALL_SUBTEST(x) x
+#  define VERIFY(x) x
+#  define test_Complex main
+#endif
+
+#include <unsupported/Eigen/Complex>
+#include <vector>
+
+using namespace std;
+using namespace Eigen;
+
+template <typename T>
+void take_std( std::complex<T> * dst, int n )
+{
+    cout << dst[n-1] << endl;
+}
+
+
+template <typename T>
+void syntax()
+{
+    // this works fine
+    Matrix< Complex<T>, 9, 1>  a;
+    std::complex<T> * pa = &a[0];
+    Complex<T> * pa2 = &a[0];
+    take_std( pa,9);
+
+    // this does not work, but I wish it would
+    // take_std(&a[0];)
+    // this does
+    take_std( (std::complex<T> *)&a[0],9);
+
+    // this does not work, but it would be really nice
+    //vector< Complex<T> > a; 
+    // (on my gcc 4.4.1 )
+    // std::vector assumes operator& returns a POD pointer
+
+    // this works fine
+    Complex<T> b[9];
+    std::complex<T> * pb = &b[0]; // this works fine
+
+    take_std( pb,9);
+}
+
+void test_Complex()
+{
+  CALL_SUBTEST( syntax<float>() );
+  CALL_SUBTEST( syntax<double>() );
+  CALL_SUBTEST( syntax<long double>() );
+} 
diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp
index cc68f3718..ad0d426e4 100644
--- a/unsupported/test/FFT.cpp
+++ b/unsupported/test/FFT.cpp
@@ -101,12 +101,34 @@ void test_scalar_generic(int nfft)
     ComplexVector outbuf;
     for (int k=0;k<nfft;++k)
         inbuf[k]= (T)(rand()/(double)RAND_MAX - .5);
+
+    // make sure it DOESN'T give the right full spectrum answer
+    // if we've asked for half-spectrum
+    fft.SetFlag(fft.HalfSpectrum );
+    fft.fwd( outbuf,inbuf);
+    VERIFY(outbuf.size() == (nfft>>1)+1);
+    VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
+
+    fft.ClearFlag(fft.HalfSpectrum );
     fft.fwd( outbuf,inbuf);
     VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
 
     ScalarVector buf3;
     fft.inv( buf3 , outbuf);
     VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
+
+    // verify that the Unscaled flag takes effect
+    ComplexVector buf4;
+    fft.SetFlag(fft.Unscaled);
+    fft.inv( buf4 , outbuf);
+    for (int k=0;k<nfft;++k)
+        buf4[k] *= T(1./nfft);
+    VERIFY( dif_rmse(inbuf,buf4) < test_precision<T>()  );// gross check
+
+    // verify that ClearFlag works
+    fft.ClearFlag(fft.Unscaled);
+    fft.inv( buf3 , outbuf);
+    VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
 }
 
 template <typename T>
@@ -136,6 +158,19 @@ void test_complex_generic(int nfft)
     fft.inv( buf3 , outbuf);
 
     VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
+
+    // verify that the Unscaled flag takes effect
+    ComplexVector buf4;
+    fft.SetFlag(fft.Unscaled);
+    fft.inv( buf4 , outbuf);
+    for (int k=0;k<nfft;++k)
+        buf4[k] *= T(1./nfft);
+    VERIFY( dif_rmse(inbuf,buf4) < test_precision<T>()  );// gross check
+
+    // verify that ClearFlag works
+    fft.ClearFlag(fft.Unscaled);
+    fft.inv( buf3 , outbuf);
+    VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
 }
 
 template <typename T>