From 92ca9fc032ecaa7f9595f5c5f7d9d8df6c972bbe Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 19 May 2009 00:21:04 -0400 Subject: [PATCH 01/34] initial pass of FFT module -- includes complex 1-d case only --- unsupported/Eigen/FFT.h | 84 +++++ unsupported/Eigen/src/FFT/simple_fft_traits.h | 296 ++++++++++++++++++ unsupported/test/CMakeLists.txt | 1 + unsupported/test/FFT.cpp | 85 +++++ 4 files changed, 466 insertions(+) create mode 100644 unsupported/Eigen/FFT.h create mode 100644 unsupported/Eigen/src/FFT/simple_fft_traits.h create mode 100644 unsupported/test/FFT.cpp diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h new file mode 100644 index 000000000..03490d2c5 --- /dev/null +++ b/unsupported/Eigen/FFT.h @@ -0,0 +1,84 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_FFT_H +#define EIGEN_FFT_H + +// simple_fft_traits: small, free, reasonably efficient default, derived from kissfft +#include "src/FFT/simple_fft_traits.h" +#define DEFAULT_FFT_TRAITS simple_fft_traits + +// FFTW: faster, GPL-not LGPL, bigger code size +#ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines +// TODO +// #include "src/FFT/fftw_traits.h" +// #define DEFAULT_FFT_TRAITS fftw_traits +#endif + +// intel Math Kernel Library: fastest, commerical +#ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines +// TODO +// #include "src/FFT/imkl_traits.h" +// #define DEFAULT_FFT_TRAITS imkl_traits +#endif + +namespace Eigen { + +template + > +class FFT +{ + public: + typedef _Traits traits_type; + typedef typename traits_type::Scalar Scalar; + typedef typename traits_type::Complex Complex; + + FFT(const traits_type & traits=traits_type() ) :m_traits(traits) { } + + void fwd( Complex * dst, const Complex * src, int nfft) + { + m_traits.prepare(nfft,false,dst,src); + m_traits.exec(dst,src); + m_traits.postprocess(dst); + } + + void inv( Complex * dst, const Complex * src, int nfft) + { + m_traits.prepare(nfft,true,dst,src); + m_traits.exec(dst,src); + m_traits.postprocess(dst); + } + + // TODO: fwd,inv for Scalar + // TODO: multi-dimensional FFTs + // TODO: handle Eigen MatrixBase + + traits_type & traits() {return m_traits;} + private: + traits_type m_traits; +}; +#undef DEFAULT_FFT_TRAITS +} +#endif diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h new file mode 100644 index 000000000..fe9d24b84 --- /dev/null +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -0,0 +1,296 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include +#include + +namespace Eigen { + + template + struct simple_fft_traits + { + typedef _Scalar Scalar; + typedef std::complex Complex; + simple_fft_traits() : m_nfft(0) {} + + void prepare(int nfft,bool inverse,Complex * dst,const Complex *src) + { + if (m_nfft == nfft) { + // reuse the twiddles, conjugate if necessary + if (inverse != m_inverse) + for (int i=0;in) + p=n;// no more factors + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + + void exec(Complex * dst, const Complex * src) + { + work(0, dst, src, 1,1); + } + + void postprocess(Complex *dst) + { + if (m_inverse) { + Scalar scale = 1./m_nfft; + for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + + scratch[0] *= epi3.imag(); + + *Fout += scratch[3]; + + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; + } + } + } + + int m_nfft; + bool m_inverse; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + }; +} diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index abfbb0185..49de7dfb2 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -19,3 +19,4 @@ ei_add_test(autodiff) ei_add_test(BVH) ei_add_test(matrixExponential) ei_add_test(alignedvector3) +ei_add_test(FFT) diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp new file mode 100644 index 000000000..b13a7810f --- /dev/null +++ b/unsupported/test/FFT.cpp @@ -0,0 +1,85 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "main.h" +#include + +//#include +//#include +//#include + +using namespace std; + +template +void test_fft(int nfft) +{ + typedef typename Eigen::FFT::Complex Complex; + + //cout << "type:" << typeid(T).name() << " nfft:" << nfft; + + FFT fft; + + vector inbuf(nfft); + vector buf3(nfft); + vector outbuf(nfft); + for (int k=0;k acc = 0; + long double phinc = 2*k0* M_PIl / nfft; + for (int k1=0;k1 x(inbuf[k1].real(),inbuf[k1].imag()); + acc += x * exp( complex(0,-k1*phinc) ); + } + totalpower += norm(acc); + complex x(outbuf[k0].real(),outbuf[k0].imag()); + complex dif = acc - x; + difpower += norm(dif); + } + long double rmse = sqrt(difpower/totalpower); + VERIFY( rmse < 1e-5 );// gross check + + totalpower=0; + difpower=0; + for (int k=0;k(32) )); CALL_SUBTEST(( test_fft(32) )); CALL_SUBTEST(( test_fft(32) )); + CALL_SUBTEST(( test_fft(1024) )); CALL_SUBTEST(( test_fft(1024) )); CALL_SUBTEST(( test_fft(1024) )); + CALL_SUBTEST(( test_fft(2*3*4*5*7) )); CALL_SUBTEST(( test_fft(2*3*4*5*7) )); CALL_SUBTEST(( test_fft(2*3*4*5*7) )); +} From be1b2ad4ec1851aa990dc97337fc74e16edd1dd0 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 19 May 2009 00:26:19 -0400 Subject: [PATCH 02/34] removed unused code --- unsupported/test/FFT.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index b13a7810f..8347bb76b 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -25,10 +25,6 @@ #include "main.h" #include -//#include -//#include -//#include - using namespace std; template @@ -36,8 +32,6 @@ void test_fft(int nfft) { typedef typename Eigen::FFT::Complex Complex; - //cout << "type:" << typeid(T).name() << " nfft:" << nfft; - FFT fft; vector inbuf(nfft); From 68cad98bc935e53102a9432560085b81c5766743 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 19 May 2009 00:34:38 -0400 Subject: [PATCH 03/34] indent level change --- unsupported/Eigen/src/FFT/simple_fft_traits.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index fe9d24b84..6fbbeac2e 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -80,13 +80,15 @@ namespace Eigen { void postprocess(Complex *dst) { - if (m_inverse) { - Scalar scale = 1./m_nfft; - for (int k=0;k Date: Fri, 22 May 2009 22:37:59 -0400 Subject: [PATCH 04/34] added non-optimized real forward fft (no inverse yet) --- unsupported/Eigen/FFT.h | 27 ++++- unsupported/Eigen/src/FFT/simple_fft_traits.h | 10 +- unsupported/test/FFT.cpp | 113 ++++++++++++------ 3 files changed, 106 insertions(+), 44 deletions(-) diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h index 03490d2c5..a1f87a609 100644 --- a/unsupported/Eigen/FFT.h +++ b/unsupported/Eigen/FFT.h @@ -57,21 +57,36 @@ class FFT FFT(const traits_type & traits=traits_type() ) :m_traits(traits) { } - void fwd( Complex * dst, const Complex * src, int nfft) + template + void fwd( Complex * dst, const _Input * src, int nfft) { m_traits.prepare(nfft,false,dst,src); m_traits.exec(dst,src); m_traits.postprocess(dst); } - void inv( Complex * dst, const Complex * src, int nfft) + template + void fwd( std::vector & dst, const std::vector<_Input> & src) { - m_traits.prepare(nfft,true,dst,src); - m_traits.exec(dst,src); - m_traits.postprocess(dst); + dst.resize( src.size() ); + fwd( &dst[0],&src[0],src.size() ); + } + + template + void inv( _Output * dst, const Complex * src, int nfft) + { + m_traits.prepare(nfft,true,dst,src); + m_traits.exec(dst,src); + m_traits.postprocess(dst); + } + + template + void inv( std::vector<_Output> & dst, const std::vector & src) + { + dst.resize( src.size() ); + inv( &dst[0],&src[0],src.size() ); } - // TODO: fwd,inv for Scalar // TODO: multi-dimensional FFTs // TODO: handle Eigen MatrixBase diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index 6fbbeac2e..5a910dd1f 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -34,7 +34,8 @@ namespace Eigen { typedef std::complex Complex; simple_fft_traits() : m_nfft(0) {} - void prepare(int nfft,bool inverse,Complex * dst,const Complex *src) + template + void prepare(int nfft,bool inverse,Complex * dst,const _Src *src) { if (m_nfft == nfft) { // reuse the twiddles, conjugate if necessary @@ -73,7 +74,8 @@ namespace Eigen { }while(n>1); } - void exec(Complex * dst, const Complex * src) + template + void exec(Complex * dst, const _Src * src) { work(0, dst, src, 1,1); } @@ -89,7 +91,9 @@ namespace Eigen { private: - void work( int stage,Complex * Fout, const Complex * f, size_t fstride,size_t in_stride) + + template + void work( int stage,Complex * Fout, const _Src * f, size_t fstride,size_t in_stride) { int p = m_stageRadix[stage]; int m = m_stageRemainder[stage]; diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 8347bb76b..ef03359e2 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -25,55 +25,98 @@ #include "main.h" #include + using namespace std; +template < typename T> +complex promote(complex x) { return complex(x.real(),x.imag()); } + +complex promote(float x) { return complex( x); } +complex promote(double x) { return complex( x); } +complex promote(long double x) { return complex( x); } + + + template + long double fft_rmse( const vector & fftbuf,const vector & timebuf) + { + long double totalpower=0; + long double difpower=0; + for (size_t k0=0;k0 acc = 0; + long double phinc = -2.*k0* M_PIl / timebuf.size(); + for (size_t k1=0;k1(0,k1*phinc) ); + } + totalpower += norm(acc); + complex x = promote(fftbuf[k0]); + complex dif = acc - x; + difpower += norm(dif); + cerr << k0 << ":" << acc << " " << x << endl; + } + cerr << "rmse:" << sqrt(difpower/totalpower) << endl; + return sqrt(difpower/totalpower); + } + + template + long double dif_rmse( const vector buf1,const vector buf2) + { + long double totalpower=0; + long double difpower=0; + size_t n = min( buf1.size(),buf2.size() ); + for (size_t k=0;k -void test_fft(int nfft) +void test_scalar(int nfft) +{ + typedef typename Eigen::FFT::Complex Complex; + typedef typename Eigen::FFT::Scalar Scalar; + + FFT fft; + vector inbuf(nfft); + vector outbuf; + for (int k=0;k +void test_complex(int nfft) { typedef typename Eigen::FFT::Complex Complex; FFT fft; vector inbuf(nfft); - vector buf3(nfft); - vector outbuf(nfft); + vector outbuf; + vector buf3; for (int k=0;k acc = 0; - long double phinc = 2*k0* M_PIl / nfft; - for (int k1=0;k1 x(inbuf[k1].real(),inbuf[k1].imag()); - acc += x * exp( complex(0,-k1*phinc) ); - } - totalpower += norm(acc); - complex x(outbuf[k0].real(),outbuf[k0].imag()); - complex dif = acc - x; - difpower += norm(dif); - } - long double rmse = sqrt(difpower/totalpower); - VERIFY( rmse < 1e-5 );// gross check + VERIFY( fft_rmse(outbuf,inbuf) < 1e-5 );// gross check - totalpower=0; - difpower=0; - for (int k=0;k(32) )); CALL_SUBTEST(( test_fft(32) )); CALL_SUBTEST(( test_fft(32) )); - CALL_SUBTEST(( test_fft(1024) )); CALL_SUBTEST(( test_fft(1024) )); CALL_SUBTEST(( test_fft(1024) )); - CALL_SUBTEST(( test_fft(2*3*4*5*7) )); CALL_SUBTEST(( test_fft(2*3*4*5*7) )); CALL_SUBTEST(( test_fft(2*3*4*5*7) )); + CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); + CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); + CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); + CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); + CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); + CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); + CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); + + CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); + CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); } From 9c0fcd0f6213143216710a5b215aa2bb4a857ce5 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 23 May 2009 10:09:48 -0400 Subject: [PATCH 05/34] started real optimization, added benchmark for FFT --- bench/benchFFT.cpp | 64 +++++++++ unsupported/Eigen/FFT.h | 8 +- unsupported/Eigen/src/FFT/simple_fft_traits.h | 125 +++++++++++++----- unsupported/test/FFT.cpp | 3 +- 4 files changed, 157 insertions(+), 43 deletions(-) create mode 100644 bench/benchFFT.cpp diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp new file mode 100644 index 000000000..041576b75 --- /dev/null +++ b/bench/benchFFT.cpp @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include +#include +#include +#include +#include + +using namespace Eigen; +using namespace std; + +#ifndef NFFT +#define NFFT 1024 +#endif + +#ifndef TYPE +#define TYPE float +#endif + +#ifndef NITS +#define NITS (10000000/NFFT) +#endif + +int main() +{ + vector > inbuf(NFFT); + vector > outbuf(NFFT); + Eigen::FFT fft; + + fft.fwd( outbuf , inbuf); + + BenchTimer timer; + timer.reset(); + for (int k=0;k<8;++k) { + timer.start(); + for(int i = 0; i < NITS; i++) + fft.fwd( outbuf , inbuf); + timer.stop(); + } + double mflops = 5.*NFFT*log2((double)NFFT) / (1e6 * timer.value() / (double)NITS ); + cout << "NFFT=" << NFFT << " " << (double(1e-6*NFFT*NITS)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; +} diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h index a1f87a609..c466423b7 100644 --- a/unsupported/Eigen/FFT.h +++ b/unsupported/Eigen/FFT.h @@ -60,9 +60,7 @@ class FFT template void fwd( Complex * dst, const _Input * src, int nfft) { - m_traits.prepare(nfft,false,dst,src); - m_traits.exec(dst,src); - m_traits.postprocess(dst); + m_traits.fwd(dst,src,nfft); } template @@ -75,9 +73,7 @@ class FFT template void inv( _Output * dst, const Complex * src, int nfft) { - m_traits.prepare(nfft,true,dst,src); - m_traits.exec(dst,src); - m_traits.postprocess(dst); + m_traits.inv( dst,src,nfft ); } template diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index 5a910dd1f..33433ae03 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -35,7 +35,73 @@ namespace Eigen { simple_fft_traits() : m_nfft(0) {} template - void prepare(int nfft,bool inverse,Complex * dst,const _Src *src) + void fwd( Complex * dst,const _Src *src,int nfft) + { + prepare(nfft,false); + work(0, dst, src, 1,1); + scale(dst); + } + + void fwd( Complex * dst,const Scalar * src,int nfft) + { + if ( nfft&1 ) { + // use generic mode for odd + prepare(nfft,false); + work(0, dst, src, 1,1); + scale(dst); + }else{ + int ncfft = nfft>>1; + // use optimized mode for even real + prepare(nfft,false); + work(0,dst, reinterpret_cast (src),2,1); + Complex dc = dst[0].real() + dst[0].imag(); + Complex nyquist = dst[0].real() - dst[0].imag(); + + int k; + for ( k=1;k <= ncfft/2 ; ++k ) { +/** + fpk = st->tmpbuf[k]; + fpnk.r = st->tmpbuf[ncfft-k].r; + fpnk.i = - st->tmpbuf[ncfft-k].i; + + C_ADD( f1k, fpk , fpnk ); + C_SUB( f2k, fpk , fpnk ); + C_MUL( tw , f2k , st->super_twiddles[k-1]); + + freqdata[k].r = HALF_OF(f1k.r + tw.r); + freqdata[k].i = HALF_OF(f1k.i + tw.i); + freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r); + freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i); + */ + Complex fpk = dst[k]; + Complex fpnk = conj(dst[ncfft-k]); + + Complex f1k = fpk + fpnk; + Complex f2k = fpnk - fpk; + //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double)k / ncfft + 1) ) ); // TODO repalce this with index into twiddles + Complex tw = f2k * m_twiddles[2*k];; + + dst[k] = (f1k + tw) * Scalar(.5); + dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); + } + // place conjugate-symmetric half at the end for completeness + // TODO: make this configurable ( opt-out ) + for ( k=1;k < ncfft ; ++k ) + dst[nfft-k] = conj(dst[k]); + + dst[0] = dc; + dst[ncfft] = nyquist; + } + } + + void inv(Complex * dst,const Complex *src,int nfft) + { + prepare(nfft,true); + work(0, dst, src, 1,1); + scale(dst); + } + + void prepare(int nfft,bool inverse) { if (m_nfft == nfft) { // reuse the twiddles, conjugate if necessary @@ -74,57 +140,49 @@ namespace Eigen { }while(n>1); } - template - void exec(Complex * dst, const _Src * src) - { - work(0, dst, src, 1,1); - } - - void postprocess(Complex *dst) + void scale(Complex *dst) { if (m_inverse) { - Scalar scale = 1./m_nfft; + Scalar s = 1./m_nfft; for (int k=0;k - void work( int stage,Complex * Fout, const _Src * f, size_t fstride,size_t in_stride) + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) { int p = m_stageRadix[stage]; int m = m_stageRemainder[stage]; - Complex * Fout_beg = Fout; - Complex * Fout_end = Fout + p*m; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; - if (m==1) { - do{ - *Fout = *f; - f += fstride*in_stride; - }while(++Fout != Fout_end ); - }else{ + if (m>1) { do{ // recursive call: // DFT of size m*p performed by doing // p instances of smaller DFTs of size m, // each one takes a decimated version of the input - work(stage+1, Fout , f, fstride*p,in_stride); - f += fstride*in_stride; - }while( (Fout += m) != Fout_end ); + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); } - - Fout=Fout_beg; + xout=Fout_beg; // recombine the p smaller DFTs switch (p) { - case 2: bfly2(Fout,fstride,m); break; - case 3: bfly3(Fout,fstride,m); break; - case 4: bfly4(Fout,fstride,m); break; - case 5: bfly5(Fout,fstride,m); break; - default: bfly_generic(Fout,fstride,m,p); break; + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; } } @@ -139,7 +197,7 @@ namespace Eigen { void bfly4( Complex * Fout, const size_t fstride, const size_t m) { - Complex scratch[7]; + Complex scratch[6]; int negative_if_inverse = m_inverse * -2 +1; for (size_t k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); ++Fout; }while(--k); diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index ef03359e2..13e98ba77 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -115,8 +115,9 @@ void test_FFT() CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); - +/* CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); + */ } From 304798817268706463f3ff645c8c8b2c887c090a Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 23 May 2009 12:50:07 -0400 Subject: [PATCH 06/34] scalar forward FFT optimized for even size, converts to cpx for odd --- unsupported/Eigen/src/FFT/simple_fft_traits.h | 156 ++++++++++-------- unsupported/test/FFT.cpp | 10 +- 2 files changed, 97 insertions(+), 69 deletions(-) diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index 33433ae03..f7dd2b9cf 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -24,6 +24,7 @@ #include #include +#include namespace Eigen { @@ -39,51 +40,54 @@ namespace Eigen { { prepare(nfft,false); work(0, dst, src, 1,1); - scale(dst); } + // real-to-complex forward FFT + // perform two FFTs of src even and src odd + // then twiddle to recombine them into the half-spectrum format + // then fill in the conjugate symmetric half void fwd( Complex * dst,const Scalar * src,int nfft) { if ( nfft&1 ) { // use generic mode for odd prepare(nfft,false); work(0, dst, src, 1,1); - scale(dst); }else{ int ncfft = nfft>>1; // use optimized mode for even real - prepare(nfft,false); - work(0,dst, reinterpret_cast (src),2,1); + fwd( dst, reinterpret_cast (src),ncfft); + make_real_twiddles(nfft); + std::cerr << "dst[0] = " << dst[0] << "\n"; Complex dc = dst[0].real() + dst[0].imag(); Complex nyquist = dst[0].real() - dst[0].imag(); - int k; - for ( k=1;k <= ncfft/2 ; ++k ) { -/** - fpk = st->tmpbuf[k]; - fpnk.r = st->tmpbuf[ncfft-k].r; - fpnk.i = - st->tmpbuf[ncfft-k].i; - - C_ADD( f1k, fpk , fpnk ); - C_SUB( f2k, fpk , fpnk ); - C_MUL( tw , f2k , st->super_twiddles[k-1]); - - freqdata[k].r = HALF_OF(f1k.r + tw.r); - freqdata[k].i = HALF_OF(f1k.i + tw.i); - freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r); - freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i); - */ +#if 0 + using namespace std; + cerr << "desired:\n"; + for ( k=1;k <= (ncfft>>1) ; ++k ) { + Complex x = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); + cerr << k << " " << x << "angle(x):" << arg(x) << "\n"; + } + dc=0; + cerr << "twiddles:\n"; + for (k=0;k>1) ; ++k ) { Complex fpk = dst[k]; Complex fpnk = conj(dst[ncfft-k]); - Complex f1k = fpk + fpnk; - Complex f2k = fpnk - fpk; - //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double)k / ncfft + 1) ) ); // TODO repalce this with index into twiddles - Complex tw = f2k * m_twiddles[2*k];; - + Complex f2k = fpk - fpnk; + //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); + Complex tw= f2k * m_realTwiddles[k-1]; + dst[k] = (f1k + tw) * Scalar(.5); dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); } + + // place conjugate-symmetric half at the end for completeness // TODO: make this configurable ( opt-out ) for ( k=1;k < ncfft ; ++k ) @@ -98,55 +102,74 @@ namespace Eigen { { prepare(nfft,true); work(0, dst, src, 1,1); - scale(dst); + scale(dst, Scalar(1)/m_nfft ); } void prepare(int nfft,bool inverse) { - if (m_nfft == nfft) { - // reuse the twiddles, conjugate if necessary - if (inverse != m_inverse) - for (int i=0;in) - p=n;// no more factors - } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); + make_twiddles(nfft,inverse); + factorize(nfft); } - void scale(Complex *dst) + void make_real_twiddles(int nfft) + { + int ncfft2 = nfft>>2; + if ( m_realTwiddles.size() != ncfft2) { + m_realTwiddles.resize(ncfft2); + int ncfft= nfft>>1; + for (int k=1;k<=ncfft2;++k) + m_realTwiddles[k-1] = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); + } + } + + void make_twiddles(int nfft,bool inverse) + { + if ( m_twiddles.size() == nfft) { + // reuse the twiddles, conjugate if necessary + if (inverse != m_inverse) + for (int i=0;in) + p=n;// no more factors + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + m_nfft = nfft; + } + + void scale(Complex *dst,Scalar s) { - if (m_inverse) { - Scalar s = 1./m_nfft; for (int k=0;k m_twiddles; + std::vector m_realTwiddles; std::vector m_stageRadix; std::vector m_stageRemainder; }; diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 13e98ba77..41c7fed7b 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -41,6 +41,7 @@ complex promote(long double x) { return complex( x); { long double totalpower=0; long double difpower=0; + cerr <<"idx\ttruth\t\tvalue\n"; for (size_t k0=0;k0 acc = 0; long double phinc = -2.*k0* M_PIl / timebuf.size(); @@ -51,7 +52,7 @@ complex promote(long double x) { return complex( x); complex x = promote(fftbuf[k0]); complex dif = acc - x; difpower += norm(dif); - cerr << k0 << ":" << acc << " " << x << endl; + cerr << k0 << "\t" << acc << "\t" << x << endl; } cerr << "rmse:" << sqrt(difpower/totalpower) << endl; return sqrt(difpower/totalpower); @@ -108,6 +109,7 @@ void test_complex(int nfft) void test_FFT() { +#if 0 CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); @@ -115,9 +117,11 @@ void test_FFT() CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); -/* +#endif + +#if 1 CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); - */ +#endif } From 326ea773908c2d7e46101085af8f72d20b3f8cbc Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 23 May 2009 22:50:07 -0400 Subject: [PATCH 07/34] added FFT inverse complex-to-scalar interface (not yet optimized) --- bench/benchFFT.cpp | 73 ++++++++++++++----- unsupported/Eigen/src/FFT/simple_fft_traits.h | 31 ++++---- unsupported/test/FFT.cpp | 21 ++++-- 3 files changed, 81 insertions(+), 44 deletions(-) diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index 041576b75..84cc49fe3 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -31,34 +31,67 @@ using namespace Eigen; using namespace std; -#ifndef NFFT -#define NFFT 1024 -#endif + +template +string nameof(); + +template <> string nameof() {return "float";} +template <> string nameof() {return "double";} +template <> string nameof() {return "long double";} #ifndef TYPE #define TYPE float #endif -#ifndef NITS -#define NITS (10000000/NFFT) +#ifndef NFFT +#define NFFT 1024 +#endif +#ifndef NDATA +#define NDATA 1000000 #endif -int main() +using namespace Eigen; + +template +void bench(int nfft) { - vector > inbuf(NFFT); - vector > outbuf(NFFT); - Eigen::FFT fft; + typedef typename NumTraits::Real Scalar; + typedef typename std::complex Complex; + int nits = NDATA/nfft; + vector inbuf(nfft); + vector outbuf(nfft); + FFT< Scalar > fft; - fft.fwd( outbuf , inbuf); + fft.fwd( outbuf , inbuf); - BenchTimer timer; - timer.reset(); - for (int k=0;k<8;++k) { - timer.start(); - for(int i = 0; i < NITS; i++) - fft.fwd( outbuf , inbuf); - timer.stop(); - } - double mflops = 5.*NFFT*log2((double)NFFT) / (1e6 * timer.value() / (double)NITS ); - cout << "NFFT=" << NFFT << " " << (double(1e-6*NFFT*NITS)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; + BenchTimer timer; + timer.reset(); + for (int k=0;k<8;++k) { + timer.start(); + for(int i = 0; i < nits; i++) + fft.fwd( outbuf , inbuf); + timer.stop(); + } + + cout << nameof() << " "; + double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits ); + if ( NumTraits::IsComplex ) { + cout << "complex"; + }else{ + cout << "real "; + mflops /= 2; + } + + cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; +} + +int main(int argc,char ** argv) +{ + bench >(NFFT); + bench(NFFT); + bench >(NFFT); + bench(NFFT); + bench >(NFFT); + bench(NFFT); + return 0; } diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index f7dd2b9cf..1e2be8f79 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -54,28 +54,14 @@ namespace Eigen { work(0, dst, src, 1,1); }else{ int ncfft = nfft>>1; + int ncfft2 = nfft>>2; // use optimized mode for even real fwd( dst, reinterpret_cast (src),ncfft); make_real_twiddles(nfft); - std::cerr << "dst[0] = " << dst[0] << "\n"; Complex dc = dst[0].real() + dst[0].imag(); Complex nyquist = dst[0].real() - dst[0].imag(); int k; -#if 0 - using namespace std; - cerr << "desired:\n"; - for ( k=1;k <= (ncfft>>1) ; ++k ) { - Complex x = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); - cerr << k << " " << x << "angle(x):" << arg(x) << "\n"; - } - dc=0; - cerr << "twiddles:\n"; - for (k=0;k>1) ; ++k ) { + for ( k=1;k <= ncfft2 ; ++k ) { Complex fpk = dst[k]; Complex fpnk = conj(dst[ncfft-k]); Complex f1k = fpk + fpnk; @@ -87,7 +73,6 @@ namespace Eigen { dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); } - // place conjugate-symmetric half at the end for completeness // TODO: make this configurable ( opt-out ) for ( k=1;k < ncfft ; ++k ) @@ -98,6 +83,16 @@ namespace Eigen { } } + // half-complex to scalar + void inv( Scalar * dst,const Complex * src,int nfft) + { + // TODO add optimized version for even numbers + std::vector tmp(nfft); + inv(&tmp[0],src,nfft); + for (int k=0;kn) - p=n;// no more factors + p=n;// impossible to have a factor > sqrt(n) } n /= p; m_stageRadix.push_back(p); diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 41c7fed7b..75c33277d 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -28,6 +28,10 @@ using namespace std; +float norm(float x) {return x*x;} +double norm(double x) {return x*x;} +long double norm(long double x) {return x*x;} + template < typename T> complex promote(complex x) { return complex(x.real(),x.imag()); } @@ -83,7 +87,11 @@ void test_scalar(int nfft) for (int k=0;k() );// gross check + + vector buf3; + fft.inv( buf3 , outbuf); + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check } template @@ -100,18 +108,18 @@ void test_complex(int nfft) inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) ); fft.fwd( outbuf , inbuf); - VERIFY( fft_rmse(outbuf,inbuf) < 1e-5 );// gross check + VERIFY( fft_rmse(outbuf,inbuf) < test_precision() );// gross check fft.inv( buf3 , outbuf); - VERIFY( dif_rmse(inbuf,buf3) < 1e-5 );// gross check + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check } void test_FFT() { -#if 0 +#if 1 CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); - CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); + CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); @@ -120,8 +128,9 @@ void test_FFT() #endif #if 1 + CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); - CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); + CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); #endif } From 210092d16c57ec2fd2f8f515151de284e8a737e3 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 25 May 2009 20:35:24 -0400 Subject: [PATCH 08/34] changed name from simple_fft_traits to ei_kissfft_impl --- bench/benchFFT.cpp | 2 +- unsupported/Eigen/{FFT.h => FFT} | 18 ++++++------ ...{simple_fft_traits.h => ei_kissfft_impl.h} | 29 +++++++++++++++++-- unsupported/test/FFT.cpp | 3 +- 4 files changed, 37 insertions(+), 15 deletions(-) rename unsupported/Eigen/{FFT.h => FFT} (86%) rename unsupported/Eigen/src/FFT/{simple_fft_traits.h => ei_kissfft_impl.h} (95%) diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index 84cc49fe3..ffa4ffffc 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -26,7 +26,7 @@ #include #include #include -#include +#include using namespace Eigen; using namespace std; diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT similarity index 86% rename from unsupported/Eigen/FFT.h rename to unsupported/Eigen/FFT index c466423b7..3d852f5a2 100644 --- a/unsupported/Eigen/FFT.h +++ b/unsupported/Eigen/FFT @@ -25,28 +25,28 @@ #ifndef EIGEN_FFT_H #define EIGEN_FFT_H -// simple_fft_traits: small, free, reasonably efficient default, derived from kissfft -#include "src/FFT/simple_fft_traits.h" -#define DEFAULT_FFT_TRAITS simple_fft_traits +// ei_kissfft_impl: small, free, reasonably efficient default, derived from kissfft +#include "src/FFT/ei_kissfft_impl.h" +#define DEFAULT_FFT_IMPL ei_kissfft_impl // FFTW: faster, GPL-not LGPL, bigger code size #ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines // TODO -// #include "src/FFT/fftw_traits.h" -// #define DEFAULT_FFT_TRAITS fftw_traits +// #include "src/FFT/ei_fftw_impl.h" +// #define DEFAULT_FFT_IMPL ei_fftw_impl #endif // intel Math Kernel Library: fastest, commerical #ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines // TODO -// #include "src/FFT/imkl_traits.h" -// #define DEFAULT_FFT_TRAITS imkl_traits +// #include "src/FFT/ei_imkl_impl.h" +// #define DEFAULT_FFT_IMPL ei_imkl_impl #endif namespace Eigen { template + typename _Traits=DEFAULT_FFT_IMPL<_Scalar> > class FFT { @@ -90,6 +90,6 @@ class FFT private: traits_type m_traits; }; -#undef DEFAULT_FFT_TRAITS +#undef DEFAULT_FFT_IMPL } #endif diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h similarity index 95% rename from unsupported/Eigen/src/FFT/simple_fft_traits.h rename to unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 1e2be8f79..ce2c9f16e 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -24,16 +24,15 @@ #include #include -#include namespace Eigen { template - struct simple_fft_traits + struct ei_kissfft_impl { typedef _Scalar Scalar; typedef std::complex Complex; - simple_fft_traits() : m_nfft(0) {} + ei_kissfft_impl() : m_nfft(0) {} template void fwd( Complex * dst,const _Src *src,int nfft) @@ -370,5 +369,29 @@ namespace Eigen { std::vector m_realTwiddles; std::vector m_stageRadix; std::vector m_stageRemainder; +/* + enum {FORWARD,INVERSE,REAL,COMPLEX}; + + struct PlanKey + { + PlanKey(int nfft,bool isinverse,bool iscomplex) + { + _key = (nfft<<2) | (isinverse<<1) | iscomplex; + } + + bool operator<(const PlanKey & other) const + { + return this->_key < other._key; + } + int _key; + }; + + struct PlanData + { + std::vector m_twiddles; + }; + + std::map. #include "main.h" -#include - +#include using namespace std; From 03ed6f9bfb63879d475f5bb8ea46cff96063d010 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 25 May 2009 23:06:49 -0400 Subject: [PATCH 09/34] refactored ei_kissfft_impl to maintain a cache of cpx fft plans --- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 612 ++++++++++---------- unsupported/test/FFT.cpp | 7 +- 2 files changed, 320 insertions(+), 299 deletions(-) diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index ce2c9f16e..3580e6c61 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -24,21 +24,279 @@ #include #include +#include namespace Eigen { + template + struct ei_kiss_cpx_fft + { + typedef _Scalar Scalar; + typedef std::complex Complex; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + bool m_inverse; + + ei_kiss_cpx_fft() { } + + void make_twiddles(int nfft,bool inverse) + { + m_inverse = inverse; + m_twiddles.resize(nfft); + Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; + for (int i=0;in) + p=n;// impossible to have a factor > sqrt(n) + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + } + + template + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + + if (m>1) { + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); + } + xout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; + } + } + + void bfly2( Complex * Fout, const size_t fstride, int m) + { + for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; + } + } + } + }; + + template struct ei_kissfft_impl { typedef _Scalar Scalar; typedef std::complex Complex; - ei_kissfft_impl() : m_nfft(0) {} + ei_kissfft_impl() {} + + void clear() + { + m_plans.clear(); + m_realTwiddles.clear(); + } template void fwd( Complex * dst,const _Src *src,int nfft) { - prepare(nfft,false); - work(0, dst, src, 1,1); + get_plan(nfft,false).work(0, dst, src, 1,1); } // real-to-complex forward FFT @@ -47,16 +305,16 @@ namespace Eigen { // then fill in the conjugate symmetric half void fwd( Complex * dst,const Scalar * src,int nfft) { - if ( nfft&1 ) { + if ( nfft&3 ) { // use generic mode for odd - prepare(nfft,false); - work(0, dst, src, 1,1); + get_plan(nfft,false).work(0, dst, src, 1,1); }else{ int ncfft = nfft>>1; int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); + // use optimized mode for even real - fwd( dst, reinterpret_cast (src),ncfft); - make_real_twiddles(nfft); + fwd( dst, reinterpret_cast (src), ncfft); Complex dc = dst[0].real() + dst[0].imag(); Complex nyquist = dst[0].real() - dst[0].imag(); int k; @@ -65,8 +323,7 @@ namespace Eigen { Complex fpnk = conj(dst[ncfft-k]); Complex f1k = fpk + fpnk; Complex f2k = fpk - fpnk; - //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); - Complex tw= f2k * m_realTwiddles[k-1]; + Complex tw= f2k * rtw[k-1]; dst[k] = (f1k + tw) * Scalar(.5); dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); @@ -94,304 +351,67 @@ namespace Eigen { void inv(Complex * dst,const Complex *src,int nfft) { - prepare(nfft,true); - work(0, dst, src, 1,1); - scale(dst, Scalar(1)/m_nfft ); - } - - void prepare(int nfft,bool inverse) - { - make_twiddles(nfft,inverse); - factorize(nfft); - } - - void make_real_twiddles(int nfft) - { - int ncfft2 = nfft>>2; - if ( m_realTwiddles.size() != ncfft2) { - m_realTwiddles.resize(ncfft2); - int ncfft= nfft>>1; - for (int k=1;k<=ncfft2;++k) - m_realTwiddles[k-1] = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); - } - } - - void make_twiddles(int nfft,bool inverse) - { - if ( m_twiddles.size() == nfft) { - // reuse the twiddles, conjugate if necessary - if (inverse != m_inverse) - for (int i=0;in) - p=n;// impossible to have a factor > sqrt(n) - } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); - } - m_nfft = nfft; - } - - void scale(Complex *dst,Scalar s) - { - for (int k=0;k - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + typedef ei_kiss_cpx_fft PlanData; + + typedef std::map PlanMap; + PlanMap m_plans; + std::map > m_realTwiddles; + + int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } + + PlanData & get_plan(int nfft,bool inverse) { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; - - if (m>1) { - do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); - } - xout=Fout_beg; - - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; - } - } - - void bfly2( Complex * Fout, const size_t fstride, int m) - { - for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); - } - - void bfly5( Complex * Fout, const size_t fstride, const size_t m) - { - Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - Complex scratch[13]; - Complex * twiddles = &m_twiddles[0]; - Complex *tw; - Complex ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=twiddles; - for ( u=0; u::iterator MapIt; + MapIt it; + it = m_plans.find( PlanKey(nfft,inverse) ); + if (it == m_plans.end() ) { + // create new entry + it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); + MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); + if (it2 != m_plans.end() ) { + it->second = it2.second; + it->second.invert(); + }else{ + it->second.make_twiddles(nfft,inverse); + it->second.factorize(nfft); + } } - - k=u; - for ( q1=0 ; q1

=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; - } - k += m; + return it->second; + */ + PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; + if ( pd.m_twiddles.size() == 0 ) { + pd.make_twiddles(nfft,inverse); + pd.factorize(nfft); } - } + return pd; } - int m_nfft; - bool m_inverse; - std::vector m_twiddles; - std::vector m_realTwiddles; - std::vector m_stageRadix; - std::vector m_stageRemainder; -/* - enum {FORWARD,INVERSE,REAL,COMPLEX}; - - struct PlanKey + Complex * real_twiddles(int ncfft2) { - PlanKey(int nfft,bool isinverse,bool iscomplex) - { - _key = (nfft<<2) | (isinverse<<1) | iscomplex; + std::vector & twidref = m_realTwiddles[ncfft2];// creates new if not there + if ( (int)twidref.size() != ncfft2 ) { + twidref.resize(ncfft2); + int ncfft= ncfft2<<1; + Scalar pi = acos( Scalar(-1) ); + for (int k=1;k<=ncfft2;++k) + twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) ); } + return &twidref[0]; + } - bool operator<(const PlanKey & other) const - { - return this->_key < other._key; - } - int _key; - }; - - struct PlanData + void scale(Complex *dst,int n,Scalar s) { - std::vector m_twiddles; - }; - - std::map promote(long double x) { return complex( x); { long double totalpower=0; long double difpower=0; - cerr <<"idx\ttruth\t\tvalue\n"; + cerr <<"idx\ttruth\t\tvalue\t|dif|=\n"; for (size_t k0=0;k0 acc = 0; long double phinc = -2.*k0* M_PIl / timebuf.size(); @@ -55,7 +55,7 @@ complex promote(long double x) { return complex( x); complex x = promote(fftbuf[k0]); complex dif = acc - x; difpower += norm(dif); - cerr << k0 << "\t" << acc << "\t" << x << endl; + cerr << k0 << "\t" << acc << "\t" << x << "\t" << sqrt(norm(dif)) << endl; } cerr << "rmse:" << sqrt(difpower/totalpower) << endl; return sqrt(difpower/totalpower); @@ -127,8 +127,9 @@ void test_FFT() #endif #if 1 - CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); + CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); #endif From 09b47332553a79dab30516e6b1d410dea90cf9b7 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 25 May 2009 23:52:21 -0400 Subject: [PATCH 10/34] added real-optimized inverse FFT (NFFT must be multiple of 4) --- bench/benchFFT.cpp | 30 +- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 688 ++++++++++---------- 2 files changed, 370 insertions(+), 348 deletions(-) diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index ffa4ffffc..14f5063fb 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -53,7 +53,7 @@ template <> string nameof() {return "long double";} using namespace Eigen; template -void bench(int nfft) +void bench(int nfft,bool fwd) { typedef typename NumTraits::Real Scalar; typedef typename std::complex Complex; @@ -69,7 +69,10 @@ void bench(int nfft) for (int k=0;k<8;++k) { timer.start(); for(int i = 0; i < nits; i++) - fft.fwd( outbuf , inbuf); + if (fwd) + fft.fwd( outbuf , inbuf); + else + fft.inv(inbuf,outbuf); timer.stop(); } @@ -82,16 +85,27 @@ void bench(int nfft) mflops /= 2; } + if (fwd) + cout << " fwd"; + else + cout << " inv"; + cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; } int main(int argc,char ** argv) { - bench >(NFFT); - bench(NFFT); - bench >(NFFT); - bench(NFFT); - bench >(NFFT); - bench(NFFT); + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); return 0; } diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 3580e6c61..453c7f6da 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -28,390 +28,398 @@ namespace Eigen { - template - struct ei_kiss_cpx_fft + template + struct ei_kiss_cpx_fft + { + typedef _Scalar Scalar; + typedef std::complex Complex; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + bool m_inverse; + + void make_twiddles(int nfft,bool inverse) { - typedef _Scalar Scalar; - typedef std::complex Complex; - std::vector m_twiddles; - std::vector m_stageRadix; - std::vector m_stageRemainder; - bool m_inverse; + m_inverse = inverse; + m_twiddles.resize(nfft); + Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; + for (int i=0;in) + p=n;// impossible to have a factor > sqrt(n) + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + + template + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + + if (m>1) { + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); + } + xout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; + } + } + + void bfly2( Complex * Fout, const size_t fstride, int m) + { + for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; } - - void factorize(int nfft) - { - if (m_stageRadix.size()==0 || m_stageRadix[0] * m_stageRemainder[0] != nfft) - { - m_stageRadix.resize(0); - m_stageRemainder.resize(0); - //factorize - //start factoring out 4's, then 2's, then 3,5,7,9,... - int n= nfft; - int p=4; - do { - while (n % p) { - switch (p) { - case 4: p = 2; break; - case 2: p = 3; break; - default: p += 2; break; - } - if (p*p>n) - p=n;// impossible to have a factor > sqrt(n) - } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); - } - } - - template - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) - { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; - - if (m>1) { - do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); - } - xout=Fout_beg; - - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; - } - } - - void bfly2( Complex * Fout, const size_t fstride, int m) - { - for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); - } - - void bfly5( Complex * Fout, const size_t fstride, const size_t m) - { - Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - Complex scratch[13]; - Complex * twiddles = &m_twiddles[0]; - Complex *tw; - Complex ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=twiddles; - for ( u=0; u=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; - } - k += m; - } - } - } - }; - + } + } + }; template struct ei_kissfft_impl { - typedef _Scalar Scalar; - typedef std::complex Complex; - ei_kissfft_impl() {} + typedef _Scalar Scalar; + typedef std::complex Complex; - void clear() - { + void clear() + { m_plans.clear(); m_realTwiddles.clear(); - } + } - template - void fwd( Complex * dst,const _Src *src,int nfft) - { + template + void fwd( Complex * dst,const _Src *src,int nfft) + { get_plan(nfft,false).work(0, dst, src, 1,1); - } + } - // real-to-complex forward FFT - // perform two FFTs of src even and src odd - // then twiddle to recombine them into the half-spectrum format - // then fill in the conjugate symmetric half - void fwd( Complex * dst,const Scalar * src,int nfft) - { + // real-to-complex forward FFT + // perform two FFTs of src even and src odd + // then twiddle to recombine them into the half-spectrum format + // then fill in the conjugate symmetric half + void fwd( Complex * dst,const Scalar * src,int nfft) + { if ( nfft&3 ) { - // use generic mode for odd - get_plan(nfft,false).work(0, dst, src, 1,1); + // use generic mode for odd + get_plan(nfft,false).work(0, dst, src, 1,1); }else{ - int ncfft = nfft>>1; - int ncfft2 = nfft>>2; - Complex * rtw = real_twiddles(ncfft2); + int ncfft = nfft>>1; + int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); - // use optimized mode for even real - fwd( dst, reinterpret_cast (src), ncfft); - Complex dc = dst[0].real() + dst[0].imag(); - Complex nyquist = dst[0].real() - dst[0].imag(); - int k; - for ( k=1;k <= ncfft2 ; ++k ) { - Complex fpk = dst[k]; - Complex fpnk = conj(dst[ncfft-k]); - Complex f1k = fpk + fpnk; - Complex f2k = fpk - fpnk; - Complex tw= f2k * rtw[k-1]; + // use optimized mode for even real + fwd( dst, reinterpret_cast (src), ncfft); + Complex dc = dst[0].real() + dst[0].imag(); + Complex nyquist = dst[0].real() - dst[0].imag(); + int k; + for ( k=1;k <= ncfft2 ; ++k ) { + Complex fpk = dst[k]; + Complex fpnk = conj(dst[ncfft-k]); + Complex f1k = fpk + fpnk; + Complex f2k = fpk - fpnk; + Complex tw= f2k * rtw[k-1]; + dst[k] = (f1k + tw) * Scalar(.5); + dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); + } - dst[k] = (f1k + tw) * Scalar(.5); - dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); - } - - // place conjugate-symmetric half at the end for completeness - // TODO: make this configurable ( opt-out ) - for ( k=1;k < ncfft ; ++k ) - dst[nfft-k] = conj(dst[k]); - - dst[0] = dc; - dst[ncfft] = nyquist; + // place conjugate-symmetric half at the end for completeness + // TODO: make this configurable ( opt-out ) + for ( k=1;k < ncfft ; ++k ) + dst[nfft-k] = conj(dst[k]); + dst[0] = dc; + dst[ncfft] = nyquist; } - } + } - // half-complex to scalar - void inv( Scalar * dst,const Complex * src,int nfft) - { - // TODO add optimized version for even numbers - std::vector tmp(nfft); - inv(&tmp[0],src,nfft); - for (int k=0;k>1; + int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); + m_scratchBuf.resize(ncfft); + m_scratchBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); + for (int k = 1; k <= ncfft / 2; ++k) { + Complex fk = src[k]; + Complex fnkc = conj(src[ncfft-k]); + Complex fek = fk + fnkc; + Complex tmp = fk - fnkc; + Complex fok = tmp * conj(rtw[k-1]); + m_scratchBuf[k] = fek + fok; + m_scratchBuf[ncfft-k] = conj(fek - fok); + } + scale(&m_scratchBuf[0], ncfft, Scalar(1)/nfft ); + get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_scratchBuf[0], 1,1); + } + } - typedef ei_kiss_cpx_fft PlanData; + private: - typedef std::map PlanMap; - PlanMap m_plans; - std::map > m_realTwiddles; + typedef ei_kiss_cpx_fft PlanData; - int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } + typedef std::map PlanMap; + PlanMap m_plans; + std::map > m_realTwiddles; + std::vector m_scratchBuf; - PlanData & get_plan(int nfft,bool inverse) - { - /* + int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } + + PlanData & get_plan(int nfft,bool inverse) + { + /* TODO: figure out why this does not work (g++ 4.3.2) * for some reason this does not work * - typedef typename std::map::iterator MapIt; - MapIt it; - it = m_plans.find( PlanKey(nfft,inverse) ); - if (it == m_plans.end() ) { - // create new entry - it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); - MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); - if (it2 != m_plans.end() ) { - it->second = it2.second; - it->second.invert(); - }else{ - it->second.make_twiddles(nfft,inverse); - it->second.factorize(nfft); - } + PlanMap::iterator it; + it = m_plans.find( PlanKey(nfft,inverse) ); + if (it == m_plans.end() ) { + // create new entry + it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); + MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); + if (it2 != m_plans.end() ) { + it->second = it2.second; + it->second.conjugate(); + }else{ + it->second.make_twiddles(nfft,inverse); + it->second.factorize(nfft); + } } return it->second; */ PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; if ( pd.m_twiddles.size() == 0 ) { - pd.make_twiddles(nfft,inverse); - pd.factorize(nfft); + pd.make_twiddles(nfft,inverse); + pd.factorize(nfft); } return pd; - } + } - Complex * real_twiddles(int ncfft2) - { + Complex * real_twiddles(int ncfft2) + { std::vector & twidref = m_realTwiddles[ncfft2];// creates new if not there if ( (int)twidref.size() != ncfft2 ) { - twidref.resize(ncfft2); - int ncfft= ncfft2<<1; - Scalar pi = acos( Scalar(-1) ); - for (int k=1;k<=ncfft2;++k) - twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) ); + twidref.resize(ncfft2); + int ncfft= ncfft2<<1; + Scalar pi = acos( Scalar(-1) ); + for (int k=1;k<=ncfft2;++k) + twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) ); } return &twidref[0]; - } + } - void scale(Complex *dst,int n,Scalar s) - { + void scale(Complex *dst,int n,Scalar s) + { for (int k=0;k Date: Wed, 27 May 2009 21:32:42 -0400 Subject: [PATCH 11/34] various comment changes --- unsupported/Eigen/FFT | 6 +- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 472 ++++++++++---------- 2 files changed, 231 insertions(+), 247 deletions(-) diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 3d852f5a2..31d8c74c5 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -1,5 +1,5 @@ // This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. +// for linear algebra. // // Copyright (C) 2009 Mark Borgerding mark a borgerding net // @@ -29,14 +29,14 @@ #include "src/FFT/ei_kissfft_impl.h" #define DEFAULT_FFT_IMPL ei_kissfft_impl -// FFTW: faster, GPL-not LGPL, bigger code size +// FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size #ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines // TODO // #include "src/FFT/ei_fftw_impl.h" // #define DEFAULT_FFT_IMPL ei_fftw_impl #endif -// intel Math Kernel Library: fastest, commerical +// intel Math Kernel Library: fastest, commerical -- incompatible with Eigen in GPL form #ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines // TODO // #include "src/FFT/ei_imkl_impl.h" diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 453c7f6da..91fa5ca18 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -1,5 +1,5 @@ // This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. +// for linear algebra. // // Copyright (C) 2009 Mark Borgerding mark a borgerding net // @@ -28,252 +28,255 @@ namespace Eigen { + // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft + // Copyright 2003-2009 Mark Borgerding + template - struct ei_kiss_cpx_fft - { - typedef _Scalar Scalar; - typedef std::complex Complex; - std::vector m_twiddles; - std::vector m_stageRadix; - std::vector m_stageRemainder; - bool m_inverse; - - void make_twiddles(int nfft,bool inverse) + struct ei_kiss_cpx_fft { - m_inverse = inverse; - m_twiddles.resize(nfft); - Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; - for (int i=0;i Complex; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + bool m_inverse; - void conjugate() - { - m_inverse = !m_inverse; - for ( size_t i=0;in) + p=n;// impossible to have a factor > sqrt(n) + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + + template + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + + if (m>1) { + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); + } + xout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; } - if (p*p>n) - p=n;// impossible to have a factor > sqrt(n) } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); - } - template - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) - { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; + void bfly2( Complex * Fout, const size_t fstride, int m) + { + for (int k=0;k1) { do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); + scratch[1]=Fout[m] * *tw1; + scratch[2]=Fout[m2] * *tw2; + + scratch[3]=scratch[1]+scratch[2]; + scratch[0]=scratch[1]-scratch[2]; + tw1 += fstride; + tw2 += fstride*2; + Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); } - xout=Fout_beg; - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; - } - } + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; - void bfly2( Complex * Fout, const size_t fstride, int m) - { - for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); - } - - void bfly5( Complex * Fout, const size_t fstride, const size_t m) - { - Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - Complex scratch[13]; - Complex * twiddles = &m_twiddles[0]; - Complex *tw; - Complex ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=twiddles; - for ( u=0; u=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; + /* perform the butterfly for one stage of a mixed radix FFT */ + void bfly_generic( + Complex * Fout, + const size_t fstride, + int m, + int p + ) + { + int u,k,q1,q; + Complex * twiddles = &m_twiddles[0]; + Complex t; + int Norig = m_twiddles.size(); + Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) ); + + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; } - k += m; } } - } - }; + }; template - struct ei_kissfft_impl - { + struct ei_kissfft_impl + { typedef _Scalar Scalar; typedef std::complex Complex; @@ -284,10 +287,10 @@ namespace Eigen { } template - void fwd( Complex * dst,const _Src *src,int nfft) - { - get_plan(nfft,false).work(0, dst, src, 1,1); - } + void fwd( Complex * dst,const _Src *src,int nfft) + { + get_plan(nfft,false).work(0, dst, src, 1,1); + } // real-to-complex forward FFT // perform two FFTs of src even and src odd @@ -363,11 +366,10 @@ namespace Eigen { } } - private: - + private: typedef ei_kiss_cpx_fft PlanData; - typedef std::map PlanMap; + PlanMap m_plans; std::map > m_realTwiddles; std::vector m_scratchBuf; @@ -376,25 +378,7 @@ namespace Eigen { PlanData & get_plan(int nfft,bool inverse) { - /* TODO: figure out why this does not work (g++ 4.3.2) - * for some reason this does not work - * - PlanMap::iterator it; - it = m_plans.find( PlanKey(nfft,inverse) ); - if (it == m_plans.end() ) { - // create new entry - it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); - MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); - if (it2 != m_plans.end() ) { - it->second = it2.second; - it->second.conjugate(); - }else{ - it->second.make_twiddles(nfft,inverse); - it->second.factorize(nfft); - } - } - return it->second; - */ + // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; if ( pd.m_twiddles.size() == 0 ) { pd.make_twiddles(nfft,inverse); @@ -421,5 +405,5 @@ namespace Eigen { for (int k=0;k Date: Sat, 30 May 2009 17:55:47 -0400 Subject: [PATCH 12/34] added ei_fftw_impl --- unsupported/Eigen/FFT | 5 +- unsupported/Eigen/src/FFT/ei_fftw_impl.h | 198 ++++++++++++++++++++ unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 25 +-- 3 files changed, 214 insertions(+), 14 deletions(-) create mode 100644 unsupported/Eigen/src/FFT/ei_fftw_impl.h diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 31d8c74c5..03f8504a4 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -30,9 +30,8 @@ #define DEFAULT_FFT_IMPL ei_kissfft_impl // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size -#ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines -// TODO -// #include "src/FFT/ei_fftw_impl.h" +#ifdef FFTW_ESTIMATE // definition of FFTW_ESTIMATE indicates the caller has included fftw3.h, we can use FFTW routines +#include "src/FFT/ei_fftw_impl.h" // #define DEFAULT_FFT_IMPL ei_fftw_impl #endif diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h new file mode 100644 index 000000000..d592bbb20 --- /dev/null +++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h @@ -0,0 +1,198 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +namespace Eigen { + // FFTW uses non-const arguments + // so we must use ugly const_cast calls for all the args it uses + // + // This should be safe as long as + // 1. we use FFTW_ESTIMATE for all our planning + // see the FFTW docs section 4.3.2 "Planner Flags" + // 2. fftw_complex is compatible with std::complex + // This assumes std::complex layout is array of size 2 with real,imag + template + T * ei_fftw_cast(const T* p) + { + return const_cast( p); + } + + fftw_complex * ei_fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + fftwf_complex * ei_fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + fftwl_complex * ei_fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + template + struct ei_fftw_plan {}; + + template <> + struct ei_fftw_plan + { + typedef float scalar_type; + typedef fftwf_complex complex_type; + fftwf_plan m_plan; + ei_fftw_plan() :m_plan(NULL) {} + ~ei_fftw_plan() {if (m_plan) fftwf_destroy_plan(m_plan);} + + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); + fftwf_execute_dft( m_plan, src,dst); + } + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); + fftwf_execute_dft( m_plan, src,dst); + } + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwf_execute_dft_r2c( m_plan,src,dst); + } + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftwf_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwf_execute_dft_c2r( m_plan, src,dst); + } + }; + template <> + struct ei_fftw_plan + { + typedef double scalar_type; + typedef fftw_complex complex_type; + fftw_plan m_plan; + ei_fftw_plan() :m_plan(NULL) {} + ~ei_fftw_plan() {if (m_plan) fftw_destroy_plan(m_plan);} + + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); + fftw_execute_dft( m_plan, src,dst); + } + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); + fftw_execute_dft( m_plan, src,dst); + } + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); + fftw_execute_dft_r2c( m_plan,src,dst); + } + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftw_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); + fftw_execute_dft_c2r( m_plan, src,dst); + } + }; + template <> + struct ei_fftw_plan + { + typedef long double scalar_type; + typedef fftwl_complex complex_type; + fftwl_plan m_plan; + ei_fftw_plan() :m_plan(NULL) {} + ~ei_fftw_plan() {if (m_plan) fftwl_destroy_plan(m_plan);} + + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); + fftwl_execute_dft( m_plan, src,dst); + } + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); + fftwl_execute_dft( m_plan, src,dst); + } + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwl_execute_dft_r2c( m_plan,src,dst); + } + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftwl_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwl_execute_dft_c2r( m_plan, src,dst); + } + }; + + template + struct ei_fftw_impl + { + typedef _Scalar Scalar; + typedef std::complex Complex; + + void clear() + { + m_plans.clear(); + } + + void fwd( Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src),nfft ); + } + + // real-to-complex forward FFT + void fwd( Complex * dst,const Scalar * src,int nfft) + { + get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src) ,nfft); + int nhbins=(nfft>>1)+1; + for (int k=nhbins;k < nfft; ++k ) + dst[k] = conj(dst[nfft-k]); + } + + // inverse complex-to-complex + void inv(Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft ); + // scaling + Scalar s = 1./nfft; + for (int k=0;k PlanData; + typedef std::map PlanMap; + + PlanMap m_plans; + + PlanData & get_plan(int nfft,bool inverse,void * dst,const void * src) + { + bool inplace = (dst==src); + bool aligned = ( (reinterpret_cast(src)&15) | (reinterpret_cast(dst)&15) ) == 0; + int key = (nfft<<3 ) | (inverse<<2) | (inplace<<1) | aligned; + return m_plans[key]; + } + }; +} diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 91fa5ca18..a84ac68a0 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -39,6 +39,7 @@ namespace Eigen { std::vector m_twiddles; std::vector m_stageRadix; std::vector m_stageRemainder; + std::vector m_scratchBuf; bool m_inverse; void make_twiddles(int nfft,bool inverse) @@ -75,6 +76,8 @@ namespace Eigen { n /= p; m_stageRadix.push_back(p); m_stageRemainder.push_back(n); + if ( p > 5 ) + m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic }while(n>1); } @@ -249,7 +252,7 @@ namespace Eigen { Complex * twiddles = &m_twiddles[0]; Complex t; int Norig = m_twiddles.size(); - Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) ); + Complex * scratchbuf = &m_scratchBuf[0]; for ( u=0; u>1; int ncfft2 = nfft>>2; Complex * rtw = real_twiddles(ncfft2); - m_scratchBuf.resize(ncfft); - m_scratchBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); + m_tmpBuf.resize(ncfft); + m_tmpBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); for (int k = 1; k <= ncfft / 2; ++k) { Complex fk = src[k]; Complex fnkc = conj(src[ncfft-k]); Complex fek = fk + fnkc; Complex tmp = fk - fnkc; Complex fok = tmp * conj(rtw[k-1]); - m_scratchBuf[k] = fek + fok; - m_scratchBuf[ncfft-k] = conj(fek - fok); + m_tmpBuf[k] = fek + fok; + m_tmpBuf[ncfft-k] = conj(fek - fok); } - scale(&m_scratchBuf[0], ncfft, Scalar(1)/nfft ); - get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_scratchBuf[0], 1,1); + scale(&m_tmpBuf[0], ncfft, Scalar(1)/nfft ); + get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_tmpBuf[0], 1,1); } } @@ -372,7 +375,7 @@ namespace Eigen { PlanMap m_plans; std::map > m_realTwiddles; - std::vector m_scratchBuf; + std::vector m_tmpBuf; int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } From 1c543401741f1328b65d30a764f6273ddf9b60b6 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sun, 31 May 2009 15:44:57 -0400 Subject: [PATCH 13/34] more work on ei_fftw_impl --- bench/benchFFT.cpp | 4 ++++ unsupported/Eigen/FFT | 3 ++- unsupported/test/FFT.cpp | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index 14f5063fb..4b6cabb55 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -26,6 +26,10 @@ #include #include #include +#ifdef USE_FFTW +#include +#endif + #include using namespace Eigen; diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 03f8504a4..dc7e85908 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -32,7 +32,8 @@ // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size #ifdef FFTW_ESTIMATE // definition of FFTW_ESTIMATE indicates the caller has included fftw3.h, we can use FFTW routines #include "src/FFT/ei_fftw_impl.h" -// #define DEFAULT_FFT_IMPL ei_fftw_impl +#undef DEFAULT_FFT_IMPL +#define DEFAULT_FFT_IMPL ei_fftw_impl #endif // intel Math Kernel Library: fastest, commerical -- incompatible with Eigen in GPL form diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 32d1393d0..28230e1c4 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -23,6 +23,11 @@ // Eigen. If not, see . #include "main.h" +//#define USE_FFTW +#ifdef USE_FFTW +#include +#endif + #include using namespace std; From 4d6b962ba47ca2a6445bebd144b8122f3b8e96b7 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 10 Jun 2009 22:16:32 -0400 Subject: [PATCH 14/34] added FindFFTW, but I don't think it's right yet --- cmake/FindFFTW.cmake | 24 ++++++++++++++++++++++++ unsupported/test/CMakeLists.txt | 6 ++++++ unsupported/test/FFT.cpp | 12 +++--------- 3 files changed, 33 insertions(+), 9 deletions(-) create mode 100644 cmake/FindFFTW.cmake diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake new file mode 100644 index 000000000..a56450b17 --- /dev/null +++ b/cmake/FindFFTW.cmake @@ -0,0 +1,24 @@ + +if (FFTW_INCLUDES AND FFTW_LIBRARIES) + set(FFTW_FIND_QUIETLY TRUE) +endif (FFTW_INCLUDES AND FFTW_LIBRARIES) + +find_path(FFTW_INCLUDES + NAMES + fftw3.h + PATHS + $ENV{FFTWDIR} + ${INCLUDE_INSTALL_DIR} +) + +find_library(FFTWF_LIB NAMES fftw3f PATHS $ENV{FFTWDIR} ${LIB_INSTALL_DIR}) +find_library(FFTW_LIB NAMES fftw3 PATHS $ENV{FFTWDIR} ${LIB_INSTALL_DIR}) +find_library(FFTWL_LIB NAMES fftw3l PATHS $ENV{FFTWDIR} ${LIB_INSTALL_DIR}) +set(FFTW_LIBRARIES "${FFTWF_LIB} ${FFTW_LIB} ${FFTWL_LIB}" ) +message(STATUS "FFTW ${FFTW_LIBRARIES}" ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(FFTW DEFAULT_MSG + FFTW_INCLUDES FFTW_LIBRARIES) + +mark_as_advanced(FFTW_INCLUDES FFTW_LIBRARIES) diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 49de7dfb2..23217811f 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -20,3 +20,9 @@ ei_add_test(BVH) ei_add_test(matrixExponential) ei_add_test(alignedvector3) ei_add_test(FFT) + +find_package(FFTW) +if(FFTW_FOUND) + ei_add_test(FFTW " " "${FFTW_LIBRARIES}" ) +endif(FFTW_FOUND) + diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 28230e1c4..f0b9b68bf 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -23,11 +23,6 @@ // Eigen. If not, see . #include "main.h" -//#define USE_FFTW -#ifdef USE_FFTW -#include -#endif - #include using namespace std; @@ -121,7 +116,7 @@ void test_complex(int nfft) void test_FFT() { -#if 1 + CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); @@ -129,13 +124,12 @@ void test_FFT() CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); -#endif -#if 1 + + CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); -#endif } From 218711e18b1adc518a2f861fe53d5ce9da0e98b7 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 10 Jun 2009 23:25:27 -0400 Subject: [PATCH 15/34] example file --- unsupported/doc/examples/FFT.cpp | 117 +++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 unsupported/doc/examples/FFT.cpp diff --git a/unsupported/doc/examples/FFT.cpp b/unsupported/doc/examples/FFT.cpp new file mode 100644 index 000000000..55e29585a --- /dev/null +++ b/unsupported/doc/examples/FFT.cpp @@ -0,0 +1,117 @@ +// To use the simple FFT implementation +// g++ -o demofft -I.. -Wall -O3 FFT.cpp + +// To use the FFTW implementation +// g++ -o demofft -I.. -DUSE_FFTW -Wall -O3 FFT.cpp -lfftw3 -lfftw3f -lfftw3l + +#ifdef USE_FFTW +#include +#endif + +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace Eigen; + +template +T mag2(T a) +{ + return a*a; +} +template +T mag2(std::complex a) +{ + return norm(a); +} + +template +T mag2(const std::vector & vec) +{ + T out=0; + for (size_t k=0;k +T mag2(const std::vector > & vec) +{ + T out=0; + for (size_t k=0;k +vector operator-(const vector & a,const vector & b ) +{ + vector c(a); + for (size_t k=0;k +void RandomFill(std::vector & vec) +{ + for (size_t k=0;k +void RandomFill(std::vector > & vec) +{ + for (size_t k=0;k ( T( rand() )/T(RAND_MAX) - .5, T( rand() )/T(RAND_MAX) - .5); +} + +template +void fwd_inv(size_t nfft) +{ + typedef typename NumTraits::Real Scalar; + vector timebuf(nfft); + RandomFill(timebuf); + + vector freqbuf; + static FFT fft; + fft.fwd(freqbuf,timebuf); + + vector timebuf2; + fft.inv(timebuf2,freqbuf); + + long double rmse = mag2(timebuf - timebuf2) / mag2(timebuf); + cout << "roundtrip rmse: " << rmse << endl; +} + +template +void two_demos(int nfft) +{ + cout << " scalar "; + fwd_inv >(nfft); + cout << " complex "; + fwd_inv,std::complex >(nfft); +} + +void demo_all_types(int nfft) +{ + cout << "nfft=" << nfft << endl; + cout << " float" << endl; + two_demos(nfft); + cout << " double" << endl; + two_demos(nfft); + cout << " long double" << endl; + two_demos(nfft); +} + +int main() +{ + demo_all_types( 2*3*4*5*7 ); + demo_all_types( 2*9*16*25 ); + demo_all_types( 1024 ); + return 0; +} From e577c70e49dddd88c83175921431d2725128b0f3 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 16 Jun 2009 23:54:58 -0400 Subject: [PATCH 16/34] candidate header for Eigen/Complex --- unsupported/Eigen/Complex | 142 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 unsupported/Eigen/Complex diff --git a/unsupported/Eigen/Complex b/unsupported/Eigen/Complex new file mode 100644 index 000000000..f8d9af25c --- /dev/null +++ b/unsupported/Eigen/Complex @@ -0,0 +1,142 @@ +#ifndef EIGEN_COMPLEX_H +#define EIGEN_COMPLEX_H + +#include + +namespace Eigen { + +template +struct castable_pointer +{ + castable_pointer(_NativePtr ptr) : _ptr(ptr) {} + operator _NativePtr () {return _ptr;} + operator _PunnedPtr () {return reinterpret_cast<_PunnedPtr>(_ptr);} + private: + _NativePtr _ptr; +}; + +template +struct Complex +{ + typedef typename std::complex StandardComplex; + typedef T value_type; + + Complex(const T& re = T(), const T& im = T()) : _re(re),_im(im) { } + Complex(const Complex&other ): _re(other.real()) ,_im(other.imag()) {} + + template + Complex(const Complex&other): _re(other.real()) ,_im(other.imag()) {} + template + Complex(const std::complex&other): _re(other.real()) ,_im(other.imag()) {} + + typedef castable_pointer< Complex*, StandardComplex* > pointer_type; + typedef castable_pointer< const Complex*, const StandardComplex* > const_pointer_type; + + pointer_type operator & () {return pointer_type(this);} + const_pointer_type operator & () const {return const_pointer_type(this);} + + StandardComplex std_type() const {return StandardComplex(real(),imag());} + StandardComplex & std_type() {return *(StandardComplex*)(&(*this));} + + operator StandardComplex () const {return std_type();} + operator StandardComplex & () {return std_type();} + + T & real() {return _re;} + T & imag() {return _im;} + const T & real() const {return _re;} + const T & imag() const {return _im;} + + // *** complex member functions: *** + Complex& operator= (const T& val) { _re=val;_im=0;return *this; } + Complex& operator+= (const T& val) {_re+=val;return *this;} + Complex& operator-= (const T& val) {_re-=val;return *this;} + Complex& operator*= (const T& val) {_re*=val;_im*=val;return *this; } + Complex& operator/= (const T& val) {_re/=val;_im/=val;return *this; } + + Complex& operator= (const Complex& rhs) {_re=rhs._re;_im=rhs._im;return *this;} + Complex& operator= (const StandardComplex& rhs) {_re=rhs.real();_im=rhs.imag();return *this;} + + template Complex& operator= (const Complex& rhs) { _re=rhs._re;_im=rhs._im;return *this;} + template Complex& operator+= (const Complex& rhs) { _re+=rhs._re;_im+=rhs._im;return *this;} + template Complex& operator-= (const Complex& rhs) { _re-=rhs._re;_im-=rhs._im;return *this;} + template Complex& operator*= (const Complex& rhs) { this->std_type() *= rhs.std_type(); return *this; } + template Complex& operator/= (const Complex& rhs) { this->std_type() /= rhs.std_type(); return *this; } + + private: + T _re; + T _im; +}; + +template +T ei_to_std( const T & x) {return x;} + +template +std::complex ei_to_std( const Complex & x) {return x.std_type();} + +// 26.2.6 operators +template Complex operator+(const Complex& rhs) {return rhs;} +template Complex operator-(const Complex& rhs) {return -ei_to_std(rhs);} + +template Complex operator+(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) + ei_to_std(rhs);} +template Complex operator-(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) - ei_to_std(rhs);} +template Complex operator*(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) * ei_to_std(rhs);} +template Complex operator/(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) / ei_to_std(rhs);} +template bool operator==(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) == ei_to_std(rhs);} +template bool operator!=(const Complex& lhs, const Complex& rhs) { return ei_to_std(lhs) != ei_to_std(rhs);} + +template Complex operator+(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) + ei_to_std(rhs); } +template Complex operator-(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) - ei_to_std(rhs); } +template Complex operator*(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) * ei_to_std(rhs); } +template Complex operator/(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) / ei_to_std(rhs); } +template bool operator==(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) == ei_to_std(rhs); } +template bool operator!=(const Complex& lhs, const T& rhs) {return ei_to_std(lhs) != ei_to_std(rhs); } + +template Complex operator+(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) + ei_to_std(rhs); } +template Complex operator-(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) - ei_to_std(rhs); } +template Complex operator*(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) * ei_to_std(rhs); } +template Complex operator/(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) / ei_to_std(rhs); } +template bool operator==(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) == ei_to_std(rhs); } +template bool operator!=(const T& lhs, const Complex& rhs) {return ei_to_std(lhs) != ei_to_std(rhs); } + +template +std::basic_istream& + operator>> (std::basic_istream& istr, Complex& rhs) +{ + return istr >> rhs.std_type(); +} + +template +std::basic_ostream& +operator<< (std::basic_ostream& ostr, const Complex& rhs) +{ + return ostr << rhs.std_type(); +} + + // 26.2.7 values: + template T real(const Complex&x) {return real(ei_to_std(x));} + template T abs(const Complex&x) {return abs(ei_to_std(x));} + template T arg(const Complex&x) {return arg(ei_to_std(x));} + template T norm(const Complex&x) {return norm(ei_to_std(x));} + + template Complex conj(const Complex&x) { return conj(ei_to_std(x));} + template Complex polar(const T& x, const T&y) {return polar(ei_to_std(x),ei_to_std(y));} + // 26.2.8 transcendentals: + template Complex cos (const Complex&x){return cos(ei_to_std(x));} + template Complex cosh (const Complex&x){return cosh(ei_to_std(x));} + template Complex exp (const Complex&x){return exp(ei_to_std(x));} + template Complex log (const Complex&x){return log(ei_to_std(x));} + template Complex log10 (const Complex&x){return log10(ei_to_std(x));} + + template Complex pow(const Complex&x, int p) {return pow(ei_to_std(x),ei_to_std(p));} + template Complex pow(const Complex&x, const T&p) {return pow(ei_to_std(x),ei_to_std(p));} + template Complex pow(const Complex&x, const Complex&p) {return pow(ei_to_std(x),ei_to_std(p));} + template Complex pow(const T&x, const Complex&p) {return pow(ei_to_std(x),ei_to_std(p));} + + template Complex sin (const Complex&x){return sin(ei_to_std(x));} + template Complex sinh (const Complex&x){return sinh(ei_to_std(x));} + template Complex sqrt (const Complex&x){return sqrt(ei_to_std(x));} + template Complex tan (const Complex&x){return tan(ei_to_std(x));} + template Complex tanh (const Complex&x){return tanh(ei_to_std(x));} +} + +#endif From fb9a15e451371f4e600dbb60b8e5c9d098889844 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 17 Jun 2009 00:09:18 -0400 Subject: [PATCH 17/34] added copyright notice --- unsupported/Eigen/Complex | 50 ++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/unsupported/Eigen/Complex b/unsupported/Eigen/Complex index f8d9af25c..a0483abdf 100644 --- a/unsupported/Eigen/Complex +++ b/unsupported/Eigen/Complex @@ -1,6 +1,34 @@ #ifndef EIGEN_COMPLEX_H #define EIGEN_COMPLEX_H +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +// Eigen::Complex reuses as much as possible from std::complex +// and allows easy conversion to and from, even at the pointer level. + + #include namespace Eigen { @@ -21,6 +49,7 @@ struct Complex typedef typename std::complex StandardComplex; typedef T value_type; + // constructors Complex(const T& re = T(), const T& im = T()) : _re(re),_im(im) { } Complex(const Complex&other ): _re(other.real()) ,_im(other.imag()) {} @@ -29,22 +58,31 @@ struct Complex template Complex(const std::complex&other): _re(other.real()) ,_im(other.imag()) {} + + // allow binary access to the object as a std::complex typedef castable_pointer< Complex*, StandardComplex* > pointer_type; typedef castable_pointer< const Complex*, const StandardComplex* > const_pointer_type; - pointer_type operator & () {return pointer_type(this);} const_pointer_type operator & () const {return const_pointer_type(this);} - StandardComplex std_type() const {return StandardComplex(real(),imag());} - StandardComplex & std_type() {return *(StandardComplex*)(&(*this));} - operator StandardComplex () const {return std_type();} operator StandardComplex & () {return std_type();} - T & real() {return _re;} - T & imag() {return _im;} + StandardComplex std_type() const {return StandardComplex(real(),imag());} + StandardComplex & std_type() {return *reinterpret_cast(this);} + + + // every sort of accessor and mutator that has ever been in fashion. + // For a brief history, search for "std::complex over-encapsulated" + // http://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#387 const T & real() const {return _re;} const T & imag() const {return _im;} + T & real() {return _re;} + T & imag() {return _im;} + T & real(const T & x) {return _re=x;} + T & imag(const T & x) {return _im=x;} + void set_real(const T & x) {_re = x;} + void set_imag(const T & x) {_im = x;} // *** complex member functions: *** Complex& operator= (const T& val) { _re=val;_im=0;return *this; } From a39de276a95234ff85f58fa93364b047f3d72c6b Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 14 Sep 2009 01:52:26 -0400 Subject: [PATCH 18/34] added the test case for FFTW --- unsupported/test/CMakeLists.txt | 2 +- unsupported/test/FFTW.cpp | 136 ++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 unsupported/test/FFTW.cpp diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 23217811f..f42077bdc 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -23,6 +23,6 @@ ei_add_test(FFT) find_package(FFTW) if(FFTW_FOUND) - ei_add_test(FFTW " " "${FFTW_LIBRARIES}" ) + ei_add_test(FFTW " " "-lfftw3 -lfftw3f -lfftw3l" ) endif(FFTW_FOUND) diff --git a/unsupported/test/FFTW.cpp b/unsupported/test/FFTW.cpp new file mode 100644 index 000000000..cf7be75aa --- /dev/null +++ b/unsupported/test/FFTW.cpp @@ -0,0 +1,136 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "main.h" +#include +#include + +using namespace std; + +float norm(float x) {return x*x;} +double norm(double x) {return x*x;} +long double norm(long double x) {return x*x;} + +template < typename T> +complex promote(complex x) { return complex(x.real(),x.imag()); } + +complex promote(float x) { return complex( x); } +complex promote(double x) { return complex( x); } +complex promote(long double x) { return complex( x); } + + + template + long double fft_rmse( const vector & fftbuf,const vector & timebuf) + { + long double totalpower=0; + long double difpower=0; + cerr <<"idx\ttruth\t\tvalue\t|dif|=\n"; + for (size_t k0=0;k0 acc = 0; + long double phinc = -2.*k0* M_PIl / timebuf.size(); + for (size_t k1=0;k1(0,k1*phinc) ); + } + totalpower += norm(acc); + complex x = promote(fftbuf[k0]); + complex dif = acc - x; + difpower += norm(dif); + cerr << k0 << "\t" << acc << "\t" << x << "\t" << sqrt(norm(dif)) << endl; + } + cerr << "rmse:" << sqrt(difpower/totalpower) << endl; + return sqrt(difpower/totalpower); + } + + template + long double dif_rmse( const vector buf1,const vector buf2) + { + long double totalpower=0; + long double difpower=0; + size_t n = min( buf1.size(),buf2.size() ); + for (size_t k=0;k +void test_scalar(int nfft) +{ + typedef typename Eigen::FFT::Complex Complex; + typedef typename Eigen::FFT::Scalar Scalar; + + FFT fft; + vector inbuf(nfft); + vector outbuf; + for (int k=0;k() );// gross check + + vector buf3; + fft.inv( buf3 , outbuf); + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check +} + +template +void test_complex(int nfft) +{ + typedef typename Eigen::FFT::Complex Complex; + + FFT fft; + + vector inbuf(nfft); + vector outbuf; + vector buf3; + for (int k=0;k() );// gross check + + fft.inv( buf3 , outbuf); + + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check +} + +void test_FFTW() +{ + + CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); + CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); + CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); + CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); + CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); + CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); + CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); + + + + CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); + CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); + CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); + CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); +} From 44ba4b1d6d5cd39d824bb83876175d0dc39a9cc3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 16 Oct 2009 11:27:04 +0200 Subject: [PATCH 19/34] add operator+ scalar to AutoDiffScalar --- unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index 888aa5c8c..fc5e237ab 100644 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -108,6 +108,22 @@ class AutoDiffScalar inline const DerType& derivatives() const { return m_derivatives; } inline DerType& derivatives() { return m_derivatives; } + inline const AutoDiffScalar operator+(const Scalar& other) const + { + return AutoDiffScalar(m_value + other, m_derivatives); + } + + friend inline const AutoDiffScalar operator+(const Scalar& a, const AutoDiffScalar& b) + { + return AutoDiffScalar(a + b.value(), b.derivatives()); + } + + inline AutoDiffScalar& operator+=(const Scalar& other) + { + value() += other; + return *this; + } + template inline const AutoDiffScalar,DerType,OtherDerType> > operator+(const AutoDiffScalar& other) const From 7b0c4102facc9b5f6ca99ef76febb05a9499b8b0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 16 Oct 2009 13:22:38 +0200 Subject: [PATCH 20/34] * add a Make* expression type builder to allow the construction of generic expressions working for both dense and sparse matrix. A nicer solution would be to use CwiseBinaryOp for any kind of matrix. To this end we either need to change the overall design so that the base class(es) depends on the kind of matrix, or we could add a template parameter to each expression type (e.g., int Kind = ei_traits::Kind) allowing to specialize each expression for each kind of matrix. * Extend AutoDiffScalar to work with sparse vector expression for the derivatives. --- Eigen/Core | 1 + Eigen/Sparse | 1 + Eigen/src/Core/ExpressionMaker.h | 61 +++++++++++++ Eigen/src/Sparse/SparseExpressionMaker.h | 48 +++++++++++ .../Eigen/src/AutoDiff/AutoDiffJacobian.h | 4 +- .../Eigen/src/AutoDiff/AutoDiffScalar.h | 85 ++++++++++--------- 6 files changed, 161 insertions(+), 39 deletions(-) create mode 100644 Eigen/src/Core/ExpressionMaker.h create mode 100644 Eigen/src/Sparse/SparseExpressionMaker.h diff --git a/Eigen/Core b/Eigen/Core index c8fcb1c09..3dce6422f 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -200,6 +200,7 @@ namespace Eigen { #include "src/Core/products/TriangularMatrixMatrix.h" #include "src/Core/products/TriangularSolverMatrix.h" #include "src/Core/BandMatrix.h" +#include "src/Core/ExpressionMaker.h" } // namespace Eigen diff --git a/Eigen/Sparse b/Eigen/Sparse index a8888daa3..96bd61419 100644 --- a/Eigen/Sparse +++ b/Eigen/Sparse @@ -110,6 +110,7 @@ namespace Eigen { #include "src/Sparse/SparseLLT.h" #include "src/Sparse/SparseLDLT.h" #include "src/Sparse/SparseLU.h" +#include "src/Sparse/SparseExpressionMaker.h" #ifdef EIGEN_CHOLMOD_SUPPORT # include "src/Sparse/CholmodSupport.h" diff --git a/Eigen/src/Core/ExpressionMaker.h b/Eigen/src/Core/ExpressionMaker.h new file mode 100644 index 000000000..1d265b63c --- /dev/null +++ b/Eigen/src/Core/ExpressionMaker.h @@ -0,0 +1,61 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_EXPRESSIONMAKER_H +#define EIGEN_EXPRESSIONMAKER_H + +// computes the shape of a matrix from its traits flag +template struct ei_shape_of +{ + enum { ret = ei_traits::Flags&SparseBit ? IsSparse : IsDense }; +}; + + +// Since the Sparse module is completely separated from the Core module, there is +// no way to write the type of a generic expression working for both dense and sparse +// matrix. Unless we change the overall design, here is a workaround. +// There is an example in unsuported/Eigen/src/AutoDiff/AutoDiffScalar. + +template::ret> +struct MakeNestByValue +{ + typedef NestByValue Type; +}; + +template::ret> +struct MakeCwiseUnaryOp +{ + typedef CwiseUnaryOp Type; +}; + +template::ret> +struct MakeCwiseBinaryOp +{ + typedef CwiseBinaryOp Type; +}; + +// TODO complete the list + + +#endif // EIGEN_EXPRESSIONMAKER_H diff --git a/Eigen/src/Sparse/SparseExpressionMaker.h b/Eigen/src/Sparse/SparseExpressionMaker.h new file mode 100644 index 000000000..1fdcbb1f2 --- /dev/null +++ b/Eigen/src/Sparse/SparseExpressionMaker.h @@ -0,0 +1,48 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_SPARSE_EXPRESSIONMAKER_H +#define EIGEN_SPARSE_EXPRESSIONMAKER_H + +template +struct MakeNestByValue +{ + typedef SparseNestByValue Type; +}; + +template +struct MakeCwiseUnaryOp +{ + typedef SparseCwiseUnaryOp Type; +}; + +template +struct MakeCwiseBinaryOp +{ + typedef SparseCwiseBinaryOp Type; +}; + +// TODO complete the list + +#endif // EIGEN_SPARSE_EXPRESSIONMAKER_H diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h index a5e881487..b3983f8a6 100644 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h @@ -50,10 +50,12 @@ public: typedef typename Functor::InputType InputType; typedef typename Functor::ValueType ValueType; typedef typename Functor::JacobianType JacobianType; + typedef typename JacobianType::Scalar Scalar; - typedef Matrix DerivativeType; + typedef Matrix DerivativeType; typedef AutoDiffScalar ActiveScalar; + typedef Matrix ActiveInput; typedef Matrix ActiveValue; diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index fc5e237ab..2fb733a99 100644 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -42,9 +42,17 @@ void ei_make_coherent(const A& a, const B&b) /** \class AutoDiffScalar * \brief A scalar type replacement with automatic differentation capability * - * \param DerType the vector type used to store/represent the derivatives (e.g. Vector3f) + * \param _DerType the vector type used to store/represent the derivatives. The base scalar type + * as well as the number of derivatives to compute are determined from this type. + * Typical choices include, e.g., \c Vector4f for 4 derivatives, or \c VectorXf + * if the number of derivatives is not known at compile time, and/or, the number + * of derivatives is large. + * Note that _DerType can also be a reference (e.g., \c VectorXf&) to wrap a + * existing vector into an AutoDiffScalar. + * Finally, _DerType can also be any Eigen compatible expression. * - * This class represents a scalar value while tracking its respective derivatives. + * This class represents a scalar value while tracking its respective derivatives using Eigen's expression + * template mechanism. * * It supports the following list of global math function: * - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos, @@ -56,10 +64,11 @@ void ei_make_coherent(const A& a, const B&b) * while derivatives are computed right away. * */ -template +template class AutoDiffScalar { public: + typedef typename ei_cleantype<_DerType>::type DerType; typedef typename ei_traits::Scalar Scalar; inline AutoDiffScalar() {} @@ -108,12 +117,12 @@ class AutoDiffScalar inline const DerType& derivatives() const { return m_derivatives; } inline DerType& derivatives() { return m_derivatives; } - inline const AutoDiffScalar operator+(const Scalar& other) const + inline const AutoDiffScalar operator+(const Scalar& other) const { return AutoDiffScalar(m_value + other, m_derivatives); } - friend inline const AutoDiffScalar operator+(const Scalar& a, const AutoDiffScalar& b) + friend inline const AutoDiffScalar operator+(const Scalar& a, const AutoDiffScalar& b) { return AutoDiffScalar(a + b.value(), b.derivatives()); } @@ -125,11 +134,11 @@ class AutoDiffScalar } template - inline const AutoDiffScalar,DerType,OtherDerType> > + inline const AutoDiffScalar,DerType,typename ei_cleantype::type>::Type > operator+(const AutoDiffScalar& other) const { ei_make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar,DerType,OtherDerType> >( + return AutoDiffScalar,DerType,typename ei_cleantype::type>::Type >( m_value + other.value(), m_derivatives + other.derivatives()); } @@ -143,11 +152,11 @@ class AutoDiffScalar } template - inline const AutoDiffScalar, DerType,OtherDerType> > + inline const AutoDiffScalar, DerType,typename ei_cleantype::type>::Type > operator-(const AutoDiffScalar& other) const { ei_make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, DerType,OtherDerType> >( + return AutoDiffScalar, DerType,typename ei_cleantype::type>::Type >( m_value - other.value(), m_derivatives - other.derivatives()); } @@ -161,73 +170,73 @@ class AutoDiffScalar } template - inline const AutoDiffScalar, DerType> > + inline const AutoDiffScalar, DerType>::Type > operator-() const { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( -m_value, -m_derivatives); } - inline const AutoDiffScalar, DerType> > + inline const AutoDiffScalar, DerType>::Type > operator*(const Scalar& other) const { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( m_value * other, (m_derivatives * other)); } - friend inline const AutoDiffScalar, DerType> > + friend inline const AutoDiffScalar, DerType>::Type > operator*(const Scalar& other, const AutoDiffScalar& a) { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( a.value() * other, a.derivatives() * other); } - inline const AutoDiffScalar, DerType> > + inline const AutoDiffScalar, DerType>::Type > operator/(const Scalar& other) const { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( m_value / other, (m_derivatives * (Scalar(1)/other))); } - friend inline const AutoDiffScalar, DerType> > + friend inline const AutoDiffScalar, DerType>::Type > operator/(const Scalar& other, const AutoDiffScalar& a) { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( other / a.value(), a.derivatives() * (-Scalar(1)/other)); } template - inline const AutoDiffScalar, - NestByValue, - NestByValue, DerType> >, - NestByValue, OtherDerType> > > > > > + inline const AutoDiffScalar, + typename MakeNestByValue, + typename MakeNestByValue, DerType>::Type>::Type, + typename MakeNestByValue, typename ei_cleantype::type>::Type>::Type >::Type >::Type >::Type > operator/(const AutoDiffScalar& other) const { ei_make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, - NestByValue, - NestByValue, DerType> >, - NestByValue, OtherDerType> > > > > >( + return AutoDiffScalar, + typename MakeNestByValue, + typename MakeNestByValue, DerType>::Type>::Type, + typename MakeNestByValue, typename ei_cleantype::type>::Type>::Type >::Type >::Type >::Type >( m_value / other.value(), ((m_derivatives * other.value()).nestByValue() - (m_value * other.derivatives()).nestByValue()).nestByValue() * (Scalar(1)/(other.value()*other.value()))); } template - inline const AutoDiffScalar, - NestByValue, DerType> >, - NestByValue, OtherDerType> > > > + inline const AutoDiffScalar, + typename MakeNestByValue, DerType>::Type>::Type, + typename MakeNestByValue, typename ei_cleantype::type>::Type>::Type >::Type > operator*(const AutoDiffScalar& other) const { ei_make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, - NestByValue, DerType> >, - NestByValue, OtherDerType> > > >( + return AutoDiffScalar, + typename MakeNestByValue, DerType>::Type>::Type, + typename MakeNestByValue, typename ei_cleantype::type>::Type>::Type >::Type >( m_value * other.value(), (m_derivatives * other.value()).nestByValue() + (m_value * other.derivatives()).nestByValue()); } @@ -299,11 +308,11 @@ struct ei_make_coherent_impl \ - inline const Eigen::AutoDiffScalar::Scalar>, DerType> > \ + inline const Eigen::AutoDiffScalar::Scalar>, DerType>::Type > \ FUNC(const Eigen::AutoDiffScalar& x) { \ using namespace Eigen; \ typedef typename ei_traits::Scalar Scalar; \ - typedef AutoDiffScalar, DerType> > ReturnType; \ + typedef AutoDiffScalar, DerType>::Type > ReturnType; \ CODE; \ } @@ -330,12 +339,12 @@ namespace std return ReturnType(std::log(x.value),x.derivatives() * (Scalar(1).x.value()));) template - inline const Eigen::AutoDiffScalar::Scalar>, DerType> > + inline const Eigen::AutoDiffScalar::Scalar>, DerType>::Type > pow(const Eigen::AutoDiffScalar& x, typename Eigen::ei_traits::Scalar y) { using namespace Eigen; typedef typename ei_traits::Scalar Scalar; - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( std::pow(x.value(),y), x.derivatives() * (y * std::pow(x.value(),y-1))); } @@ -375,7 +384,7 @@ EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(ei_log, return ReturnType(ei_log(x.value),x.derivatives() * (Scalar(1).x.value()));) template -inline const AutoDiffScalar::Scalar>, DerType> > +inline const AutoDiffScalar::Scalar>, DerType>::Type > ei_pow(const AutoDiffScalar& x, typename ei_traits::Scalar y) { return std::pow(x,y);} From 5e3e6ff71a314b0a0ca569d6a7726757a39e9434 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Tue, 20 Oct 2009 22:08:13 +0200 Subject: [PATCH 21/34] Added Windows support to the BenchTimer. --- bench/BenchTimer.h | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h index bfc3a99b3..c1f473597 100644 --- a/bench/BenchTimer.h +++ b/bench/BenchTimer.h @@ -26,8 +26,14 @@ #ifndef EIGEN_BENCH_TIMER_H #define EIGEN_BENCH_TIMER_H +#ifndef WIN32 #include #include +#else +#define NOMINMAX +#include +#endif + #include #include @@ -40,7 +46,15 @@ class BenchTimer { public: - BenchTimer() { reset(); } + BenchTimer() + { +#ifdef WIN32 + LARGE_INTEGER freq; + QueryPerformanceFrequency(&freq); + m_frequency = (double)freq.QuadPart; +#endif + reset(); + } ~BenchTimer() {} @@ -51,23 +65,35 @@ public: m_best = std::min(m_best, getTime() - m_start); } - /** Return the best elapsed time. + /** Return the best elapsed time in seconds. */ inline double value(void) { - return m_best; + return m_best; } +#ifdef WIN32 + inline double getTime(void) +#else static inline double getTime(void) +#endif { +#ifdef WIN32 + LARGE_INTEGER query_ticks; + QueryPerformanceCounter(&query_ticks); + return query_ticks.QuadPart/m_frequency; +#else struct timeval tv; struct timezone tz; gettimeofday(&tv, &tz); return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec; +#endif } protected: - +#ifdef WIN32 + double m_frequency; +#endif double m_best, m_start; }; From 471b4d509234cbcbd4a1cd45d48fe10efcc2bcf1 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 20 Oct 2009 21:53:54 -0400 Subject: [PATCH 22/34] handle mark's first commits before he configured his id --- scripts/eigen_gen_credits.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/scripts/eigen_gen_credits.cpp b/scripts/eigen_gen_credits.cpp index 086548e26..d7a625d47 100644 --- a/scripts/eigen_gen_credits.cpp +++ b/scripts/eigen_gen_credits.cpp @@ -13,10 +13,24 @@ using namespace std; std::string contributor_name(const std::string& line) { string result; + + // let's first take care of the case of isolated email addresses, like + // "user@localhost.localdomain" entries + if(line.find("markb@localhost.localdomain") != string::npos) + { + return "Mark Borgerding"; + } + + // from there on we assume that we have a entry of the form + // either: + // Bla bli Blurp + // or: + // Bla bli Blurp + size_t position_of_email_address = line.find_first_of('<'); if(position_of_email_address != string::npos) { - // there is an e-mail address. + // there is an e-mail address in <...>. // Hauke once committed as "John Smith", fix that. if(line.find("hauke.heibel") != string::npos) @@ -29,7 +43,7 @@ std::string contributor_name(const std::string& line) } else { - // there is no e-mail address. + // there is no e-mail address in <...>. if(line.find("convert-repo") != string::npos) result = ""; From c3180b7ffbc98d69764b3c1ab17b36e289f7cf7e Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 20 Oct 2009 23:25:49 -0400 Subject: [PATCH 23/34] MatrixBase: * support resize() to same size (nop). The case of FFT was another case where that make one's life far easier. hope that's ok with you Gael. but indeed, i don't use it in the ReturnByValue stuff. FFT: * Support MatrixBase (well, in the case with direct memory access such as Map) * adapt unit test --- Eigen/src/Core/MatrixBase.h | 19 +++++ Eigen/src/Core/util/StaticAssert.h | 3 +- unsupported/Eigen/FFT | 32 +++++++- unsupported/test/FFT.cpp | 127 ++++++++++++++++++++++------- 4 files changed, 148 insertions(+), 33 deletions(-) diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 387c11385..0e8b49f75 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -190,6 +190,25 @@ template class MatrixBase * i.e., the number of rows for a columns major matrix, and the number of cols otherwise */ int innerSize() const { return (int(Flags)&RowMajorBit) ? this->cols() : this->rows(); } + /** Only plain matrices, not expressions may be resized; therefore the only useful resize method is + * Matrix::resize(). The present method only asserts that the new size equals the old size, and does + * nothing else. + */ + void resize(int size) + { + ei_assert(size == this->size() + && "MatrixBase::resize() does not actually allow to resize."); + } + /** Only plain matrices, not expressions may be resized; therefore the only useful resize method is + * Matrix::resize(). The present method only asserts that the new size equals the old size, and does + * nothing else. + */ + void resize(int rows, int cols) + { + ei_assert(rows == this->rows() && cols == this->cols() + && "MatrixBase::resize() does not actually allow to resize."); + } + #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal the plain matrix type corresponding to this expression. Note that is not necessarily * exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 883f2d95e..6210bc91c 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -78,7 +78,8 @@ INVALID_MATRIX_TEMPLATE_PARAMETERS, BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER, THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX, - THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES }; }; diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index dc7e85908..fafdb829b 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -36,7 +36,7 @@ #define DEFAULT_FFT_IMPL ei_fftw_impl #endif -// intel Math Kernel Library: fastest, commerical -- incompatible with Eigen in GPL form +// intel Math Kernel Library: fastest, commercial -- incompatible with Eigen in GPL form #ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines // TODO // #include "src/FFT/ei_imkl_impl.h" @@ -70,6 +70,20 @@ class FFT fwd( &dst[0],&src[0],src.size() ); } + template + void fwd( MatrixBase & dst, const MatrixBase & src) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,InputDerived) // size at compile-time + EIGEN_STATIC_ASSERT((ei_is_same_type::ret), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + EIGEN_STATIC_ASSERT(int(InputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) + dst.derived().resize( src.size() ); + fwd( &dst[0],&src[0],src.size() ); + } + template void inv( _Output * dst, const Complex * src, int nfft) { @@ -83,8 +97,24 @@ class FFT inv( &dst[0],&src[0],src.size() ); } + template + void inv( MatrixBase & dst, const MatrixBase & src) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,OutputDerived) // size at compile-time + EIGEN_STATIC_ASSERT((ei_is_same_type::ret), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) + dst.derived().resize( src.size() ); + inv( &dst[0],&src[0],src.size() ); + } + // TODO: multi-dimensional FFTs + // TODO: handle Eigen MatrixBase + // ---> i added fwd and inv specializations above + unit test, is this enough? (bjacob) traits_type & traits() {return m_traits;} private: diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index f0b9b68bf..cc68f3718 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -39,16 +39,16 @@ complex promote(double x) { return complex( x); } complex promote(long double x) { return complex( x); } - template - long double fft_rmse( const vector & fftbuf,const vector & timebuf) + template + long double fft_rmse( const VectorType1 & fftbuf,const VectorType2 & timebuf) { long double totalpower=0; long double difpower=0; cerr <<"idx\ttruth\t\tvalue\t|dif|=\n"; - for (size_t k0=0;k0 acc = 0; long double phinc = -2.*k0* M_PIl / timebuf.size(); - for (size_t k1=0;k1(0,k1*phinc) ); } totalpower += norm(acc); @@ -61,8 +61,8 @@ complex promote(long double x) { return complex( x); return sqrt(difpower/totalpower); } - template - long double dif_rmse( const vector buf1,const vector buf2) + template + long double dif_rmse( const VectorType1& buf1,const VectorType2& buf2) { long double totalpower=0; long double difpower=0; @@ -74,35 +74,59 @@ complex promote(long double x) { return complex( x); return sqrt(difpower/totalpower); } -template -void test_scalar(int nfft) +enum { StdVectorContainer, EigenVectorContainer }; + +template struct VectorType; + +template struct VectorType { - typedef typename Eigen::FFT::Complex Complex; - typedef typename Eigen::FFT::Scalar Scalar; + typedef vector type; +}; + +template struct VectorType +{ + typedef Matrix type; +}; + +template +void test_scalar_generic(int nfft) +{ + typedef typename FFT::Complex Complex; + typedef typename FFT::Scalar Scalar; + typedef typename VectorType::type ScalarVector; + typedef typename VectorType::type ComplexVector; FFT fft; - vector inbuf(nfft); - vector outbuf; + ScalarVector inbuf(nfft); + ComplexVector outbuf; for (int k=0;k() );// gross check - vector buf3; + ScalarVector buf3; fft.inv( buf3 , outbuf); VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check } -template -void test_complex(int nfft) +template +void test_scalar(int nfft) { - typedef typename Eigen::FFT::Complex Complex; + test_scalar_generic(nfft); + test_scalar_generic(nfft); +} + +template +void test_complex_generic(int nfft) +{ + typedef typename FFT::Complex Complex; + typedef typename VectorType::type ComplexVector; FFT fft; - vector inbuf(nfft); - vector outbuf; - vector buf3; + ComplexVector inbuf(nfft); + ComplexVector outbuf; + ComplexVector buf3; for (int k=0;k() );// gross check } +template +void test_complex(int nfft) +{ + test_complex_generic(nfft); + test_complex_generic(nfft); +} + void test_FFT() { - CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); - CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); - CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); - CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); - CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); - CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); - CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); + CALL_SUBTEST( test_complex(32) ); + CALL_SUBTEST( test_complex(32) ); + CALL_SUBTEST( test_complex(32) ); + + CALL_SUBTEST( test_complex(256) ); + CALL_SUBTEST( test_complex(256) ); + CALL_SUBTEST( test_complex(256) ); + + CALL_SUBTEST( test_complex(3*8) ); + CALL_SUBTEST( test_complex(3*8) ); + CALL_SUBTEST( test_complex(3*8) ); + + CALL_SUBTEST( test_complex(5*32) ); + CALL_SUBTEST( test_complex(5*32) ); + CALL_SUBTEST( test_complex(5*32) ); + + CALL_SUBTEST( test_complex(2*3*4) ); + CALL_SUBTEST( test_complex(2*3*4) ); + CALL_SUBTEST( test_complex(2*3*4) ); + + CALL_SUBTEST( test_complex(2*3*4*5) ); + CALL_SUBTEST( test_complex(2*3*4*5) ); + CALL_SUBTEST( test_complex(2*3*4*5) ); + + CALL_SUBTEST( test_complex(2*3*4*5*7) ); + CALL_SUBTEST( test_complex(2*3*4*5*7) ); + CALL_SUBTEST( test_complex(2*3*4*5*7) ); - CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); - CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); - CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); - CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); - CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); + CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(32) ); + + CALL_SUBTEST( test_scalar(45) ); + CALL_SUBTEST( test_scalar(45) ); + CALL_SUBTEST( test_scalar(45) ); + + CALL_SUBTEST( test_scalar(50) ); + CALL_SUBTEST( test_scalar(50) ); + CALL_SUBTEST( test_scalar(50) ); + + CALL_SUBTEST( test_scalar(256) ); + CALL_SUBTEST( test_scalar(256) ); + CALL_SUBTEST( test_scalar(256) ); + + CALL_SUBTEST( test_scalar(2*3*4*5*7) ); + CALL_SUBTEST( test_scalar(2*3*4*5*7) ); + CALL_SUBTEST( test_scalar(2*3*4*5*7) ); } From 85f8d1f0c65b3d0ce511b85a2847fec57637f4b6 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 21 Oct 2009 10:27:17 -0400 Subject: [PATCH 24/34] renamed 'Traits' to 'Impl', added vim modelines for syntax highlighting --- unsupported/Eigen/Complex | 2 ++ unsupported/Eigen/FFT | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/unsupported/Eigen/Complex b/unsupported/Eigen/Complex index a0483abdf..e1c41ab38 100644 --- a/unsupported/Eigen/Complex +++ b/unsupported/Eigen/Complex @@ -178,3 +178,5 @@ operator<< (std::basic_ostream& ostr, const Complex& rhs) } #endif +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ + diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index dc7e85908..1e96f4975 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -46,21 +46,21 @@ namespace Eigen { template + typename _Impl=DEFAULT_FFT_IMPL<_Scalar> > class FFT { public: - typedef _Traits traits_type; - typedef typename traits_type::Scalar Scalar; - typedef typename traits_type::Complex Complex; + typedef _Impl impl_type; + typedef typename impl_type::Scalar Scalar; + typedef typename impl_type::Complex Complex; - FFT(const traits_type & traits=traits_type() ) :m_traits(traits) { } + FFT(const impl_type & impl=impl_type() ) :m_impl(impl) { } template void fwd( Complex * dst, const _Input * src, int nfft) { - m_traits.fwd(dst,src,nfft); + m_impl.fwd(dst,src,nfft); } template @@ -73,7 +73,7 @@ class FFT template void inv( _Output * dst, const Complex * src, int nfft) { - m_traits.inv( dst,src,nfft ); + m_impl.inv( dst,src,nfft ); } template @@ -86,10 +86,11 @@ class FFT // TODO: multi-dimensional FFTs // TODO: handle Eigen MatrixBase - traits_type & traits() {return m_traits;} + impl_type & impl() {return m_impl;} private: - traits_type m_traits; + impl_type m_impl; }; #undef DEFAULT_FFT_IMPL } #endif +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ From e3d08443dc272f740447de0147efc69cf7de1c93 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 21 Oct 2009 20:53:05 -0400 Subject: [PATCH 25/34] inlining,all namespace declaration moved to FFT, removed preprocessor definitions, --- unsupported/Eigen/FFT | 41 +++++++++++++-------- unsupported/Eigen/src/FFT/ei_fftw_impl.h | 38 ++++++++++++++++--- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 28 +++++++------- unsupported/test/CMakeLists.txt | 2 +- 4 files changed, 73 insertions(+), 36 deletions(-) diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 97ec8e49b..36afdde8d 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -25,29 +25,39 @@ #ifndef EIGEN_FFT_H #define EIGEN_FFT_H -// ei_kissfft_impl: small, free, reasonably efficient default, derived from kissfft -#include "src/FFT/ei_kissfft_impl.h" -#define DEFAULT_FFT_IMPL ei_kissfft_impl +#include +#include +#include +#ifdef EIGEN_FFTW_DEFAULT // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size -#ifdef FFTW_ESTIMATE // definition of FFTW_ESTIMATE indicates the caller has included fftw3.h, we can use FFTW routines -#include "src/FFT/ei_fftw_impl.h" -#undef DEFAULT_FFT_IMPL -#define DEFAULT_FFT_IMPL ei_fftw_impl -#endif - -// intel Math Kernel Library: fastest, commercial -- incompatible with Eigen in GPL form -#ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines +# include + namespace Eigen { +# include "src/FFT/ei_fftw_impl.h" + //template typedef struct ei_fftw_impl default_fft_impl; this does not work + template struct default_fft_impl : public ei_fftw_impl {}; + } +#elif defined EIGEN_MKL_DEFAULT // TODO -// #include "src/FFT/ei_imkl_impl.h" -// #define DEFAULT_FFT_IMPL ei_imkl_impl +// intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form + namespace Eigen { +# include "src/FFT/ei_imklfft_impl.h" + template struct default_fft_impl : public ei_imklfft_impl {}; + } +#else +// ei_kissfft_impl: small, free, reasonably efficient default, derived from kissfft +// + namespace Eigen { +# include "src/FFT/ei_kissfft_impl.h" + template + struct default_fft_impl : public ei_kissfft_impl {}; + } #endif namespace Eigen { template - > + typename _Impl=default_fft_impl<_Scalar> > class FFT { public: @@ -120,7 +130,6 @@ class FFT private: impl_type m_impl; }; -#undef DEFAULT_FFT_IMPL } #endif /* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h index d592bbb20..e1f67f334 100644 --- a/unsupported/Eigen/src/FFT/ei_fftw_impl.h +++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h @@ -22,7 +22,8 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see . -namespace Eigen { + + // FFTW uses non-const arguments // so we must use ugly const_cast calls for all the args it uses // @@ -32,21 +33,25 @@ namespace Eigen { // 2. fftw_complex is compatible with std::complex // This assumes std::complex layout is array of size 2 with real,imag template + inline T * ei_fftw_cast(const T* p) { return const_cast( p); } + inline fftw_complex * ei_fftw_cast( const std::complex * p) - { + { return const_cast( reinterpret_cast(p) ); } + inline fftwf_complex * ei_fftw_cast( const std::complex * p) { return const_cast( reinterpret_cast(p) ); } + inline fftwl_complex * ei_fftw_cast( const std::complex * p) { return const_cast( reinterpret_cast(p) ); @@ -64,18 +69,22 @@ namespace Eigen { ei_fftw_plan() :m_plan(NULL) {} ~ei_fftw_plan() {if (m_plan) fftwf_destroy_plan(m_plan);} + inline void fwd(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); fftwf_execute_dft( m_plan, src,dst); } + inline void inv(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); fftwf_execute_dft( m_plan, src,dst); } + inline void fwd(complex_type * dst,scalar_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwf_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); fftwf_execute_dft_r2c( m_plan,src,dst); } + inline void inv(scalar_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwf_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); @@ -91,18 +100,22 @@ namespace Eigen { ei_fftw_plan() :m_plan(NULL) {} ~ei_fftw_plan() {if (m_plan) fftw_destroy_plan(m_plan);} + inline void fwd(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); fftw_execute_dft( m_plan, src,dst); } + inline void inv(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); fftw_execute_dft( m_plan, src,dst); } + inline void fwd(complex_type * dst,scalar_type * src,int nfft) { if (m_plan==NULL) m_plan = fftw_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); fftw_execute_dft_r2c( m_plan,src,dst); } + inline void inv(scalar_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftw_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); @@ -118,18 +131,22 @@ namespace Eigen { ei_fftw_plan() :m_plan(NULL) {} ~ei_fftw_plan() {if (m_plan) fftwl_destroy_plan(m_plan);} + inline void fwd(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); fftwl_execute_dft( m_plan, src,dst); } + inline void inv(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); fftwl_execute_dft( m_plan, src,dst); } + inline void fwd(complex_type * dst,scalar_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwl_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); fftwl_execute_dft_r2c( m_plan,src,dst); } + inline void inv(scalar_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwl_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); @@ -143,17 +160,20 @@ namespace Eigen { typedef _Scalar Scalar; typedef std::complex Complex; + inline void clear() { m_plans.clear(); } + inline void fwd( Complex * dst,const Complex *src,int nfft) { get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src),nfft ); } // real-to-complex forward FFT + inline void fwd( Complex * dst,const Scalar * src,int nfft) { get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src) ,nfft); @@ -163,30 +183,37 @@ namespace Eigen { } // inverse complex-to-complex + inline void inv(Complex * dst,const Complex *src,int nfft) { get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft ); + + //TODO move scaling to Eigen::FFT // scaling - Scalar s = 1./nfft; + Scalar s = Scalar(1.)/nfft; for (int k=0;k PlanData; typedef std::map PlanMap; PlanMap m_plans; + inline PlanData & get_plan(int nfft,bool inverse,void * dst,const void * src) { bool inplace = (dst==src); @@ -195,4 +222,3 @@ namespace Eigen { return m_plans[key]; } }; -} diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index a84ac68a0..c068d8765 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -22,11 +22,7 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see . -#include -#include -#include -namespace Eigen { // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft // Copyright 2003-2009 Mark Borgerding @@ -51,13 +47,6 @@ namespace Eigen { m_twiddles[i] = exp( Complex(0,i*phinc) ); } - void conjugate() - { - m_inverse = !m_inverse; - for ( size_t i=0;i + inline void fwd( Complex * dst,const _Src *src,int nfft) { get_plan(nfft,false).work(0, dst, src, 1,1); @@ -299,6 +294,7 @@ namespace Eigen { // perform two FFTs of src even and src odd // then twiddle to recombine them into the half-spectrum format // then fill in the conjugate symmetric half + inline void fwd( Complex * dst,const Scalar * src,int nfft) { if ( nfft&3 ) { @@ -334,6 +330,7 @@ namespace Eigen { } // inverse complex-to-complex + inline void inv(Complex * dst,const Complex *src,int nfft) { get_plan(nfft,true).work(0, dst, src, 1,1); @@ -341,6 +338,7 @@ namespace Eigen { } // half-complex to scalar + inline void inv( Scalar * dst,const Complex * src,int nfft) { if (nfft&3) { @@ -369,7 +367,7 @@ namespace Eigen { } } - private: + protected: typedef ei_kiss_cpx_fft PlanData; typedef std::map PlanMap; @@ -377,8 +375,10 @@ namespace Eigen { std::map > m_realTwiddles; std::vector m_tmpBuf; + inline int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } + inline PlanData & get_plan(int nfft,bool inverse) { // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles @@ -390,6 +390,7 @@ namespace Eigen { return pd; } + inline Complex * real_twiddles(int ncfft2) { std::vector & twidref = m_realTwiddles[ncfft2];// creates new if not there @@ -403,10 +404,11 @@ namespace Eigen { return &twidref[0]; } + // TODO move scaling up into Eigen::FFT + inline void scale(Complex *dst,int n,Scalar s) { for (int k=0;k Date: Thu, 22 Oct 2009 10:11:26 +0200 Subject: [PATCH 26/34] demeaning with colwise expression --- Eigen/src/Geometry/Umeyama.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Geometry/Umeyama.h b/Eigen/src/Geometry/Umeyama.h index 7652aa92e..d0f36d2c9 100644 --- a/Eigen/src/Geometry/Umeyama.h +++ b/Eigen/src/Geometry/Umeyama.h @@ -131,17 +131,11 @@ umeyama(const MatrixBase& src, const MatrixBase& dst, boo const VectorType dst_mean = dst.rowwise().sum() * one_over_n; // demeaning of src and dst points - RowMajorMatrixType src_demean(m,n); - RowMajorMatrixType dst_demean(m,n); - for (int i=0; i Date: Thu, 22 Oct 2009 15:56:10 -0400 Subject: [PATCH 27/34] support gcc 3.3 --- test/geo_hyperplane.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/geo_hyperplane.cpp b/test/geo_hyperplane.cpp index f8281a16b..010989feb 100644 --- a/test/geo_hyperplane.cpp +++ b/test/geo_hyperplane.cpp @@ -121,7 +121,8 @@ template void lines() VERIFY_IS_APPROX(result, center); // check conversions between two types of lines - CoeffsType converted_coeffs = HLine(PLine(line_u)).coeffs(); + PLine pl(line_u); // gcc 3.3 will commit suicide if we don't name this variable + CoeffsType converted_coeffs = HLine(pl).coeffs(); converted_coeffs *= (line_u.coeffs()[0])/(converted_coeffs[0]); VERIFY(line_u.coeffs().isApprox(converted_coeffs)); } From a382963b04d4b847c6b0ef0611511fb6a8e11718 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 23 Oct 2009 14:26:14 +0200 Subject: [PATCH 28/34] * extend Map to allow the user to specify whether the mapped data is aligned or not. This is done using the Aligned constant: Map::Map(data); * rename ForceAligned to EnforceAlignedAccess, and update its doc, and emphasize this is mainly an internal stuff. --- Eigen/src/Core/Block.h | 18 +++++++-------- Eigen/src/Core/Map.h | 35 ++++++++++++++++------------- Eigen/src/Core/MapBase.h | 40 +++++++++++++++++++++++---------- Eigen/src/Core/StableNorm.h | 2 +- Eigen/src/Core/util/Constants.h | 4 ++-- test/map.cpp | 7 +++--- 6 files changed, 63 insertions(+), 43 deletions(-) diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index cebfeaf75..5fffdcb01 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -33,10 +33,10 @@ * \param MatrixType the type of the object in which we are taking a block * \param BlockRows the number of rows of the block we are taking at compile time (optional) * \param BlockCols the number of columns of the block we are taking at compile time (optional) - * \param _PacketAccess allows to enforce aligned loads and stores if set to \b ForceAligned. - * The default is \b AsRequested. This parameter is internaly used by Eigen - * in expressions such as \code mat.block() += other; \endcode and most of - * the time this is the only way it is used. + * \param _PacketAccess \internal used to enforce aligned loads in expressions such as + * \code mat.block() += other; \endcode. Possible values are + * \c AsRequested (default) and \c EnforceAlignedAccess. + * See class MapBase for more details. * \param _DirectAccessStatus \internal used for partial specialization * * This class represents an expression of either a fixed-size or dynamic-size block. It is the return @@ -84,9 +84,9 @@ struct ei_traits::CoeffReadCost, PacketAccess = _PacketAccess }; - typedef typename ei_meta_if&, - Block >::ret AlignedDerivedType; + Block >::ret AlignedDerivedType; }; template class Block @@ -228,13 +228,13 @@ class Block class InnerIterator; typedef typename ei_traits::AlignedDerivedType AlignedDerivedType; - friend class Block; + friend class Block; EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block) - AlignedDerivedType _convertToForceAligned() + AlignedDerivedType _convertToEnforceAlignedAccess() { - return Block + return Block (m_matrix, Base::m_data, Base::m_rows.value(), Base::m_cols.value()); } diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index f6bc814e2..dba7e20e4 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -31,16 +31,14 @@ * \brief A matrix or vector expression mapping an existing array of data. * * \param MatrixType the equivalent matrix type of the mapped data - * \param _PacketAccess allows to enforce aligned loads and stores if set to ForceAligned. - * The default is AsRequested. This parameter is internaly used by Eigen - * in expressions such as \code Map<...>(...) += other; \endcode and most - * of the time this is the only way it is used. + * \param PointerAlignment specifies whether the pointer is \c Aligned, or \c Unaligned. + * The default is \c Unaligned. * * This class represents a matrix or vector expression mapping an existing array of data. * It can be used to let Eigen interface without any overhead with non-Eigen data structures, * such as plain C arrays or structures from other libraries. * - * \b Tips: to change the array of data mapped by a Map object, you can use the C++ + * \b Tip: to change the array of data mapped by a Map object, you can use the C++ * placement new syntax: * * Example: \include Map_placement_new.cpp @@ -48,22 +46,27 @@ * * This class is the return type of Matrix::Map() but can also be used directly. * + * \b Note \b to \b Eigen \b developers: The template parameter \c PointerAlignment + * can also be or-ed with \c EnforceAlignedAccess in order to enforce aligned read + * in expressions such as \code A += B; \endcode. See class MapBase for further details. + * * \sa Matrix::Map() */ -template -struct ei_traits > : public ei_traits +template +struct ei_traits > : public ei_traits { enum { - PacketAccess = _PacketAccess, - Flags = ei_traits::Flags & ~AlignedBit + PacketAccess = Options & EnforceAlignedAccess, + Flags = (Options&Aligned)==Aligned ? ei_traits::Flags | AlignedBit + : ei_traits::Flags & ~AlignedBit }; - typedef typename ei_meta_if&, - Map >::ret AlignedDerivedType; + typedef typename ei_meta_if&, + Map >::ret AlignedDerivedType; }; -template class Map - : public MapBase > +template class Map + : public MapBase > { public: @@ -72,9 +75,9 @@ template class Map inline int stride() const { return this->innerSize(); } - AlignedDerivedType _convertToForceAligned() + AlignedDerivedType _convertToEnforceAlignedAccess() { - return Map(Base::m_data, Base::m_rows.value(), Base::m_cols.value()); + return AlignedDerivedType(Base::m_data, Base::m_rows.value(), Base::m_cols.value()); } inline Map(const Scalar* data) : Base(data) {} diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index 88a3fac1e..8770732de 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -32,11 +32,17 @@ * * Expression classes inheriting MapBase must define the constant \c PacketAccess, * and type \c AlignedDerivedType in their respective ei_traits<> specialization structure. - * The value of \c PacketAccess can be either: - * - \b ForceAligned which enforces both aligned loads and stores - * - \b AsRequested which is the default behavior + * The value of \c PacketAccess can be either \b AsRequested, or set to \b EnforceAlignedAccess which + * enforces both aligned loads and stores. + * + * \c EnforceAlignedAccess is automatically set in expressions such as + * \code A += B; \endcode where A is either a Block or a Map. Here, + * this expression is transfomed into \code A = A_with_EnforceAlignedAccess + B; \endcode + * avoiding unaligned loads from A. Indeed, since Eigen's packet evaluation mechanism + * automatically align to the destination matrix, we know that loads to A will be aligned too. + * * The type \c AlignedDerivedType should correspond to the equivalent expression type - * with \c PacketAccess being \c ForceAligned. + * with \c PacketAccess set to \c EnforceAlignedAccess. * * \sa class Map, class Block */ @@ -79,19 +85,19 @@ template class MapBase * \sa MapBase::stride() */ inline const Scalar* data() const { return m_data; } - template struct force_aligned_impl { + template struct force_aligned_impl { static AlignedDerivedType run(MapBase& a) { return a.derived(); } }; template struct force_aligned_impl { - static AlignedDerivedType run(MapBase& a) { return a.derived()._convertToForceAligned(); } + static AlignedDerivedType run(MapBase& a) { return a.derived()._convertToEnforceAlignedAccess(); } }; /** \returns an expression equivalent to \c *this but having the \c PacketAccess constant - * set to \c ForceAligned. Must be reimplemented by the derived class. */ + * set to \c EnforceAlignedAccess. Must be reimplemented by the derived class. */ AlignedDerivedType forceAligned() { - return force_aligned_impl::run(*this); + return force_aligned_impl::run(*this); } inline const Scalar& coeff(int row, int col) const @@ -131,7 +137,7 @@ template class MapBase template inline PacketScalar packet(int row, int col) const { - return ei_ploadt + return ei_ploadt (m_data + (IsRowMajor ? col + row * stride() : row + col * stride())); } @@ -139,13 +145,13 @@ template class MapBase template inline PacketScalar packet(int index) const { - return ei_ploadt(m_data + index); + return ei_ploadt(m_data + index); } template inline void writePacket(int row, int col, const PacketScalar& x) { - ei_pstoret + ei_pstoret (const_cast(m_data) + (IsRowMajor ? col + row * stride() : row + col * stride()), x); } @@ -153,13 +159,14 @@ template class MapBase template inline void writePacket(int index, const PacketScalar& x) { - ei_pstoret + ei_pstoret (const_cast(m_data) + index, x); } inline MapBase(const Scalar* data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + checkDataAlignment(); } inline MapBase(const Scalar* data, int size) @@ -170,6 +177,7 @@ template class MapBase EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) ei_assert(size >= 0); ei_assert(data == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == size); + checkDataAlignment(); } inline MapBase(const Scalar* data, int rows, int cols) @@ -178,6 +186,7 @@ template class MapBase ei_assert( (data == 0) || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols))); + checkDataAlignment(); } Derived& operator=(const MapBase& other) @@ -215,6 +224,13 @@ template class MapBase { return derived() = forceAligned() / other; } protected: + + void checkDataAlignment() const + { + ei_assert( ((!(ei_traits::Flags&AlignedBit)) + || ((std::size_t(m_data)&0xf)==0)) && "data is not aligned"); + } + const Scalar* EIGEN_RESTRICT m_data; const ei_int_if_dynamic m_rows; const ei_int_if_dynamic m_cols; diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h index 06e69c448..f2d1e7240 100644 --- a/Eigen/src/Core/StableNorm.h +++ b/Eigen/src/Core/StableNorm.h @@ -59,7 +59,7 @@ MatrixBase::stableNorm() const RealScalar invScale = 1; RealScalar ssq = 0; // sum of square enum { - Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? ForceAligned : AsRequested + Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? EnforceAlignedAccess : AsRequested }; int n = size(); int bi=0; diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index affc1d478..169fb5aec 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -196,8 +196,8 @@ const unsigned int UnitLowerTriangular = LowerTriangularBit | UnitDiagBit; enum { DiagonalOnTheLeft, DiagonalOnTheRight }; -enum { Aligned, Unaligned }; -enum { ForceAligned, AsRequested }; +enum { Unaligned=0, Aligned=1 }; +enum { AsRequested=0, EnforceAlignedAccess=2 }; enum { ConditionalJumpCost = 5 }; enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight }; enum DirectionType { Vertical, Horizontal, BothDirections }; diff --git a/test/map.cpp b/test/map.cpp index 62e727304..fbff647f6 100644 --- a/test/map.cpp +++ b/test/map.cpp @@ -37,14 +37,15 @@ template void map_class(const VectorType& m) Scalar* array3unaligned = size_t(array3)%16 == 0 ? array3+1 : array3; Map(array1, size) = VectorType::Random(size); - Map(array2, size) = Map(array1, size); + Map(array2, size) = Map(array1, size); Map(array3unaligned, size) = Map(array1, size); - VectorType ma1 = Map(array1, size); + VectorType ma1 = Map(array1, size); VectorType ma2 = Map(array2, size); VectorType ma3 = Map(array3unaligned, size); VERIFY_IS_APPROX(ma1, ma2); VERIFY_IS_APPROX(ma1, ma3); - + VERIFY_RAISES_ASSERT((Map(array3unaligned, size))); + ei_aligned_delete(array1, size); ei_aligned_delete(array2, size); delete[] array3; From 66fe5a5f36c80c64fb6078c7e7cb089057bff96f Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Sat, 24 Oct 2009 14:48:34 +0200 Subject: [PATCH 29/34] It is just not that easy and requires more work to get it done right. --- Eigen/src/Core/util/Macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 706b30174..5ee17e91e 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -256,7 +256,7 @@ using Eigen::ei_cos; // C++0x features #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (defined(_MSC_VER) && (_MSC_VER >= 1600)) - #define EIGEN_REF_TO_TEMPORARY && + #define EIGEN_REF_TO_TEMPORARY const & #else #define EIGEN_REF_TO_TEMPORARY const & #endif From 7cc9fb5d0adc1a4a2d515d6c42c92c93f80a8966 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Tue, 27 Oct 2009 15:29:12 +0100 Subject: [PATCH 30/34] Umeyama is now working with fixed size src and dst points. --- Eigen/src/Geometry/Umeyama.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Geometry/Umeyama.h b/Eigen/src/Geometry/Umeyama.h index d0f36d2c9..551a69e5b 100644 --- a/Eigen/src/Geometry/Umeyama.h +++ b/Eigen/src/Geometry/Umeyama.h @@ -117,7 +117,7 @@ umeyama(const MatrixBase& src, const MatrixBase& dst, boo enum { Dimension = EIGEN_ENUM_MIN(Derived::RowsAtCompileTime, OtherDerived::RowsAtCompileTime) }; typedef Matrix VectorType; - typedef typename ei_plain_matrix_type::type MatrixType; + typedef Matrix MatrixType; typedef typename ei_plain_matrix_type_row_major::type RowMajorMatrixType; const int m = src.rows(); // dimension From dbaba9019ba2ca116406f286a71a745179e61531 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Tue, 27 Oct 2009 15:57:21 +0100 Subject: [PATCH 31/34] Added more common typedefs. --- Eigen/src/Core/Matrix.h | 11 ++++++++++- Eigen/src/Geometry/Transform.h | 19 ++++++++++++++----- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index 027e6bb70..cc2d71588 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -794,11 +794,20 @@ typedef Matrix Vector##SizeSuffix##TypeSuffix; \ /** \ingroup matrixtypedefs */ \ typedef Matrix RowVector##SizeSuffix##TypeSuffix; +#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Matrix##Size##X##TypeSuffix; \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Matrix##X##Size##TypeSuffix; + #define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \ EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \ EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \ EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \ -EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) +EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4) EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int, i) EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float, f) diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index d03fd52fd..c49356361 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -480,6 +480,15 @@ typedef Transform Transform2d; /** \ingroup Geometry_Module */ typedef Transform Transform3d; +/** \ingroup Geometry_Module */ +typedef Transform Isometry2f; +/** \ingroup Geometry_Module */ +typedef Transform Isometry3f; +/** \ingroup Geometry_Module */ +typedef Transform Isometry2d; +/** \ingroup Geometry_Module */ +typedef Transform Isometry3d; + /** \ingroup Geometry_Module */ typedef Transform Affine2f; /** \ingroup Geometry_Module */ @@ -512,7 +521,7 @@ typedef Transform Projective3d; **************************/ #ifdef EIGEN_QT_SUPPORT -/** Initialises \c *this from a QMatrix assuming the dimension is 2. +/** Initializes \c *this from a QMatrix assuming the dimension is 2. * * This function is available only if the token EIGEN_QT_SUPPORT is defined. */ @@ -538,7 +547,7 @@ Transform& Transform::operator=(const QMatrix& /** \returns a QMatrix from \c *this assuming the dimension is 2. * - * \warning this convertion might loss data if \c *this is not affine + * \warning this conversion might loss data if \c *this is not affine * * This function is available only if the token EIGEN_QT_SUPPORT is defined. */ @@ -551,7 +560,7 @@ QMatrix Transform::toQMatrix(void) const matrix.coeff(0,2), matrix.coeff(1,2)); } -/** Initialises \c *this from a QTransform assuming the dimension is 2. +/** Initializes \c *this from a QTransform assuming the dimension is 2. * * This function is available only if the token EIGEN_QT_SUPPORT is defined. */ @@ -881,7 +890,7 @@ Transform::fromPositionOrientationScale(const MatrixBase > { }; /***************************************************** -*** Specializations of construct from matix *** +*** Specializations of construct from matrix *** *****************************************************/ template From 427f8a87d1c184e9fb0c5fdc62197be2a0abb910 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Tue, 27 Oct 2009 16:02:36 +0100 Subject: [PATCH 32/34] Added dox for the new typedefs. --- Eigen/src/Core/Matrix.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index cc2d71588..17d2f2836 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -58,6 +58,9 @@ template ) * \li \c VectorXf is a dynamic-size vector of floats (\c Matrix) * + * \li \c Matrix2Xf is a partially fixed-size (dynamic-size) matrix of floats (\c Matrix) + * \li \c MatrixX3d is a partially dynamic-size (fixed-size) matrix of double (\c Matrix) + * * See \link matrixtypedefs this page \endlink for a complete list of predefined \em %Matrix and \em Vector typedefs. * * You can access elements of vectors and matrices using normal subscripting: From 611d2b0b1d64880ff207368a901c7faba73a78c5 Mon Sep 17 00:00:00 2001 From: Mathieu Gautier Date: Tue, 27 Oct 2009 13:19:16 +0000 Subject: [PATCH 33/34] Quaternion could now map an array of 4 scalars : new classes : * QuaternionBase * Map --- Eigen/src/Core/util/ForwardDeclarations.h | 1 + Eigen/src/Geometry/Quaternion.h | 545 ++++++++++++++++++++++ Eigen/src/Geometry/arch/Geometry_SSE.h | 21 + 3 files changed, 567 insertions(+) diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 65e5ce687..025904734 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -129,6 +129,7 @@ template class PlanarRotation; // Geometry module: template class RotationBase; template class Cross; +template class QuaternionBase; template class Quaternion; template class Rotation2D; template class AngleAxis; diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index 2f9f97807..f3f60a08a 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -507,4 +507,549 @@ struct ei_quaternion_assign_impl } }; +/*################################################################### + QuaternionBase and Map and Quat + ###################################################################*/ + +template +struct ei_quaternionbase_assign_impl; + +template class Quat; // [XXX] => remove when Quat becomes Quaternion + +template +struct ei_traits > +{ + typedef typename ei_traits::Scalar Scalar; + enum { + PacketAccess = ei_traits::PacketAccess + }; +}; + +template +class QuaternionBase : public RotationBase { + typedef RotationBase Base; +public: + using Base::operator*; + + typedef typename ei_traits >::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + // typedef typename Matrix Coefficients; + /** the type of a 3D vector */ + typedef Matrix Vector3; + /** the equivalent rotation matrix type */ + typedef Matrix Matrix3; + /** the equivalent angle-axis type */ + typedef AngleAxis AngleAxisType; + + /** \returns the \c x coefficient */ + inline Scalar x() const { return this->derived().coeffs().coeff(0); } + /** \returns the \c y coefficient */ + inline Scalar y() const { return this->derived().coeffs().coeff(1); } + /** \returns the \c z coefficient */ + inline Scalar z() const { return this->derived().coeffs().coeff(2); } + /** \returns the \c w coefficient */ + inline Scalar w() const { return this->derived().coeffs().coeff(3); } + + /** \returns a reference to the \c x coefficient */ + inline Scalar& x() { return this->derived().coeffs().coeffRef(0); } + /** \returns a reference to the \c y coefficient */ + inline Scalar& y() { return this->derived().coeffs().coeffRef(1); } + /** \returns a reference to the \c z coefficient */ + inline Scalar& z() { return this->derived().coeffs().coeffRef(2); } + /** \returns a reference to the \c w coefficient */ + inline Scalar& w() { return this->derived().coeffs().coeffRef(3); } + + /** \returns a read-only vector expression of the imaginary part (x,y,z) */ + inline const VectorBlock::Coefficients,3,1> vec() const { return this->derived().coeffs().template start<3>(); } + + /** \returns a vector expression of the imaginary part (x,y,z) */ + inline VectorBlock::Coefficients,3,1> vec() { return this->derived().coeffs().template start<3>(); } + + /** \returns a read-only vector expression of the coefficients (x,y,z,w) */ + inline const typename ei_traits::Coefficients& coeffs() const { return this->derived().coeffs(); } + + /** \returns a vector expression of the coefficients (x,y,z,w) */ + inline typename ei_traits::Coefficients& coeffs() { return this->derived().coeffs(); } + + template QuaternionBase& operator=(const QuaternionBase& other); + QuaternionBase& operator=(const AngleAxisType& aa); + template + QuaternionBase& operator=(const MatrixBase& m); + + /** \returns a quaternion representing an identity rotation + * \sa MatrixBase::Identity() + */ + inline static Quat Identity() { return Quat(1, 0, 0, 0); } + + /** \sa Quaternion2::Identity(), MatrixBase::setIdentity() + */ + inline QuaternionBase& setIdentity() { coeffs() << 0, 0, 0, 1; return *this; } + + /** \returns the squared norm of the quaternion's coefficients + * \sa Quaternion2::norm(), MatrixBase::squaredNorm() + */ + inline Scalar squaredNorm() const { return coeffs().squaredNorm(); } + + /** \returns the norm of the quaternion's coefficients + * \sa Quaternion2::squaredNorm(), MatrixBase::norm() + */ + inline Scalar norm() const { return coeffs().norm(); } + + /** Normalizes the quaternion \c *this + * \sa normalized(), MatrixBase::normalize() */ + inline void normalize() { coeffs().normalize(); } + /** \returns a normalized version of \c *this + * \sa normalize(), MatrixBase::normalized() */ + inline Quat normalized() const { return Quat(coeffs().normalized()); } + + /** \returns the dot product of \c *this and \a other + * Geometrically speaking, the dot product of two unit quaternions + * corresponds to the cosine of half the angle between the two rotations. + * \sa angularDistance() + */ + template inline Scalar dot(const QuaternionBase& other) const { return coeffs().dot(other.coeffs()); } + + template inline Scalar angularDistance(const QuaternionBase& other) const; + + Matrix3 toRotationMatrix(void) const; + + template + QuaternionBase& setFromTwoVectors(const MatrixBase& a, const MatrixBase& b); + + template inline Quat operator* (const QuaternionBase& q) const; + template inline QuaternionBase& operator*= (const QuaternionBase& q); + + Quat inverse(void) const; + Quat conjugate(void) const; + + template Quat slerp(Scalar t, const QuaternionBase& other) const; + + /** \returns \c true if \c *this is approximately equal to \a other, within the precision + * determined by \a prec. + * + * \sa MatrixBase::isApprox() */ + bool isApprox(const QuaternionBase& other, typename RealScalar prec = precision()) const + { return coeffs().isApprox(other.coeffs(), prec); } + + Vector3 _transformVector(Vector3 v) const; + +}; + +/* ########### Quat -> Quaternion */ + +template +struct ei_traits > +{ + typedef _Scalar Scalar; + typedef Matrix<_Scalar,4,1> Coefficients; + enum{ + PacketAccess = Aligned + }; +}; + +template +class Quat : public QuaternionBase >{ + typedef QuaternionBase > Base; +public: + using Base::operator=; + + typedef _Scalar Scalar; + + typedef typename ei_traits >::Coefficients Coefficients; + typedef typename Base::AngleAxisType AngleAxisType; + + /** Default constructor leaving the quaternion uninitialized. */ + inline Quat() {} + + /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from + * its four coefficients \a w, \a x, \a y and \a z. + * + * \warning Note the order of the arguments: the real \a w coefficient first, + * while internally the coefficients are stored in the following order: + * [\c x, \c y, \c z, \c w] + */ + inline Quat(Scalar w, Scalar x, Scalar y, Scalar z) + { coeffs() << x, y, z, w; } + + /** Constructs and initialize a quaternion from the array data + * This constructor is also used to map an array */ + inline Quat(const Scalar* data) : m_coeffs(data) {} + + /** Copy constructor */ +// template inline Quat(const QuaternionBase& other) { m_coeffs = other.coeffs(); } [XXX] redundant with 703 + + /** Constructs and initializes a quaternion from the angle-axis \a aa */ + explicit inline Quat(const AngleAxisType& aa) { *this = aa; } + + /** Constructs and initializes a quaternion from either: + * - a rotation matrix expression, + * - a 4D vector expression representing quaternion coefficients. + */ + template + explicit inline Quat(const MatrixBase& other) { *this = other; } + + /** \returns \c *this with scalar type casted to \a NewScalarType + * + * Note that if \a NewScalarType is equal to the current scalar type of \c *this + * then this function smartly returns a const reference to \c *this. + */ + template + inline typename ei_cast_return_type >::type cast() const + { return typename ei_cast_return_type >::type(*this); } + + /** Copy constructor with scalar type conversion */ + template + inline explicit Quat(const QuaternionBase& other) + { m_coeffs = other.coeffs().template cast(); } + + inline Coefficients& coeffs() { return m_coeffs;} + inline const Coefficients& coeffs() const { return m_coeffs;} + +protected: + Coefficients m_coeffs; +}; + +/* ########### Map */ + +/** \class Map + * \nonstableyet + * + * \brief Expression of a quaternion + * + * \param Scalar the type of the vector of diagonal coefficients + * + * \sa class Quaternion, class QuaternionBase + */ +template +struct ei_traits, _PacketAccess> >: +ei_traits > +{ + typedef _Scalar Scalar; + typedef Map > Coefficients; + enum { + PacketAccess = _PacketAccess + }; +}; + +template +class Map, PacketAccess > : public QuaternionBase, PacketAccess> >, ei_no_assignment_operator { + public: + + typedef _Scalar Scalar; + + typedef typename ei_traits, PacketAccess> >::Coefficients Coefficients; + + inline Map, PacketAccess >(const Scalar* coeffs) : m_coeffs(coeffs) {} + + inline Coefficients& coeffs() { return m_coeffs;} + inline const Coefficients& coeffs() const { return m_coeffs;} + + protected: + Coefficients m_coeffs; +}; + +typedef Map > QuaternionMapd; +typedef Map > QuaternionMapf; +typedef Map, Aligned> QuaternionMapAlignedd; +typedef Map, Aligned> QuaternionMapAlignedf; + +// Generic Quaternion * Quaternion product +template struct ei_quat_product +{ + inline static Quat run(const QuaternionBase& a, const QuaternionBase& b){ + return Quat + ( + a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(), + a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(), + a.w() * b.y() + a.y() * b.w() + a.z() * b.x() - a.x() * b.z(), + a.w() * b.z() + a.z() * b.w() + a.x() * b.y() - a.y() * b.x() + ); + } +}; + +/** \returns the concatenation of two rotations as a quaternion-quaternion product */ +template +template +inline Quat >::Scalar> QuaternionBase::operator* (const QuaternionBase& other) const +{ + EIGEN_STATIC_ASSERT((ei_is_same_type::ret), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + return ei_quat_product::Scalar, + ei_traits::PacketAccess && ei_traits::PacketAccess>::run(*this, other); +} + +/** \sa operator*(Quaternion) */ +template +template +inline QuaternionBase& QuaternionBase::operator*= (const QuaternionBase& other) +{ + return (*this = *this * other); +} + +/** Rotation of a vector by a quaternion. + * \remarks If the quaternion is used to rotate several points (>1) + * then it is much more efficient to first convert it to a 3x3 Matrix. + * Comparison of the operation cost for n transformations: + * - Quaternion2: 30n + * - Via a Matrix3: 24 + 15n + */ +template +inline typename QuaternionBase::Vector3 +QuaternionBase::_transformVector(Vector3 v) const +{ + // Note that this algorithm comes from the optimization by hand + // of the conversion to a Matrix followed by a Matrix/Vector product. + // It appears to be much faster than the common algorithm found + // in the litterature (30 versus 39 flops). It also requires two + // Vector3 as temporaries. + Vector3 uv = Scalar(2) * this->vec().cross(v); + return v + this->w() * uv + this->vec().cross(uv); +} + +template +template +inline QuaternionBase& QuaternionBase::operator=(const QuaternionBase& other) +{ + coeffs() = other.coeffs(); + return *this; +} + +/** Set \c *this from an angle-axis \a aa and returns a reference to \c *this + */ +template +inline QuaternionBase& QuaternionBase::operator=(const AngleAxisType& aa) +{ + Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings + this->w() = ei_cos(ha); + this->vec() = ei_sin(ha) * aa.axis(); + return *this; +} + +/** Set \c *this from the expression \a xpr: + * - if \a xpr is a 4x1 vector, then \a xpr is assumed to be a quaternion + * - if \a xpr is a 3x3 matrix, then \a xpr is assumed to be rotation matrix + * and \a xpr is converted to a quaternion + */ + +template +template +inline QuaternionBase& QuaternionBase::operator=(const MatrixBase& xpr) +{ + EIGEN_STATIC_ASSERT((ei_is_same_type::ret), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + ei_quaternionbase_assign_impl::run(*this, xpr.derived()); + return *this; +} + +/** Convert the quaternion to a 3x3 rotation matrix. The quaternion is required to + * be normalized, otherwise the result is undefined. + */ +template +inline typename QuaternionBase::Matrix3 +QuaternionBase::toRotationMatrix(void) const +{ + // NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!) + // if not inlined then the cost of the return by value is huge ~ +35%, + // however, not inlining this function is an order of magnitude slower, so + // it has to be inlined, and so the return by value is not an issue + Matrix3 res; + + const Scalar tx = 2*this->x(); + const Scalar ty = 2*this->y(); + const Scalar tz = 2*this->z(); + const Scalar twx = tx*this->w(); + const Scalar twy = ty*this->w(); + const Scalar twz = tz*this->w(); + const Scalar txx = tx*this->x(); + const Scalar txy = ty*this->x(); + const Scalar txz = tz*this->x(); + const Scalar tyy = ty*this->y(); + const Scalar tyz = tz*this->y(); + const Scalar tzz = tz*this->z(); + + res.coeffRef(0,0) = 1-(tyy+tzz); + res.coeffRef(0,1) = txy-twz; + res.coeffRef(0,2) = txz+twy; + res.coeffRef(1,0) = txy+twz; + res.coeffRef(1,1) = 1-(txx+tzz); + res.coeffRef(1,2) = tyz-twx; + res.coeffRef(2,0) = txz-twy; + res.coeffRef(2,1) = tyz+twx; + res.coeffRef(2,2) = 1-(txx+tyy); + + return res; +} + +/** Sets \c *this to be a quaternion representing a rotation between + * the two arbitrary vectors \a a and \a b. In other words, the built + * rotation represent a rotation sending the line of direction \a a + * to the line of direction \a b, both lines passing through the origin. + * + * \returns a reference to \c *this. + * + * Note that the two input vectors do \b not have to be normalized, and + * do not need to have the same norm. + */ +template +template +inline QuaternionBase& QuaternionBase::setFromTwoVectors(const MatrixBase& a, const MatrixBase& b) +{ + Vector3 v0 = a.normalized(); + Vector3 v1 = b.normalized(); + Scalar c = v1.dot(v0); + + // if dot == -1, vectors are nearly opposites + // => accuraletly compute the rotation axis by computing the + // intersection of the two planes. This is done by solving: + // x^T v0 = 0 + // x^T v1 = 0 + // under the constraint: + // ||x|| = 1 + // which yields a singular value problem + if (c < Scalar(-1)+precision()) + { + c = std::max(c,-1); + Matrix m; m << v0.transpose(), v1.transpose(); + SVD > svd(m); + Vector3 axis = svd.matrixV().col(2); + + Scalar w2 = (Scalar(1)+c)*Scalar(0.5); + this->w() = ei_sqrt(w2); + this->vec() = axis * ei_sqrt(Scalar(1) - w2); + return *this; + } + Vector3 axis = v0.cross(v1); + Scalar s = ei_sqrt((Scalar(1)+c)*Scalar(2)); + Scalar invs = Scalar(1)/s; + this->vec() = axis * invs; + this->w() = s * Scalar(0.5); + + return *this; +} + +/** \returns the multiplicative inverse of \c *this + * Note that in most cases, i.e., if you simply want the opposite rotation, + * and/or the quaternion is normalized, then it is enough to use the conjugate. + * + * \sa Quaternion2::conjugate() + */ +template +inline Quat >::Scalar> QuaternionBase::inverse() const +{ + // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite() ?? + Scalar n2 = this->squaredNorm(); + if (n2 > 0) + return Quat(conjugate().coeffs() / n2); + else + { + // return an invalid result to flag the error + return Quat(ei_traits::Coefficients::Zero()); + } +} + +/** \returns the conjugate of the \c *this which is equal to the multiplicative inverse + * if the quaternion is normalized. + * The conjugate of a quaternion represents the opposite rotation. + * + * \sa Quaternion2::inverse() + */ +template +inline Quat >::Scalar> QuaternionBase::conjugate() const +{ + return Quat(this->w(),-this->x(),-this->y(),-this->z()); +} + +/** \returns the angle (in radian) between two rotations + * \sa dot() + */ +template +template +inline typename ei_traits >::Scalar QuaternionBase::angularDistance(const QuaternionBase& other) const +{ + double d = ei_abs(this->dot(other)); + if (d>=1.0) + return 0; + return Scalar(2) * std::acos(d); +} + +/** \returns the spherical linear interpolation between the two quaternions + * \c *this and \a other at the parameter \a t + */ +template +template +Quat >::Scalar> QuaternionBase::slerp(Scalar t, const QuaternionBase& other) const +{ + static const Scalar one = Scalar(1) - precision(); + Scalar d = this->dot(other); + Scalar absD = ei_abs(d); + if (absD>=one) + return Quat(*this); + + // theta is the angle between the 2 quaternions + Scalar theta = std::acos(absD); + Scalar sinTheta = ei_sin(theta); + + Scalar scale0 = ei_sin( ( Scalar(1) - t ) * theta) / sinTheta; + Scalar scale1 = ei_sin( ( t * theta) ) / sinTheta; + if (d<0) + scale1 = -scale1; + + return Quat(scale0 * coeffs() + scale1 * other.coeffs()); +} + +// set from a rotation matrix +template +struct ei_quaternionbase_assign_impl +{ + typedef typename Other::Scalar Scalar; + template inline static void run(QuaternionBase& q, const Other& mat) + { + // This algorithm comes from "Quaternion Calculus and Fast Animation", + // Ken Shoemake, 1987 SIGGRAPH course notes + Scalar t = mat.trace(); + if (t > 0) + { + t = ei_sqrt(t + Scalar(1.0)); + q.w() = Scalar(0.5)*t; + t = Scalar(0.5)/t; + q.x() = (mat.coeff(2,1) - mat.coeff(1,2)) * t; + q.y() = (mat.coeff(0,2) - mat.coeff(2,0)) * t; + q.z() = (mat.coeff(1,0) - mat.coeff(0,1)) * t; + } + else + { + int i = 0; + if (mat.coeff(1,1) > mat.coeff(0,0)) + i = 1; + if (mat.coeff(2,2) > mat.coeff(i,i)) + i = 2; + int j = (i+1)%3; + int k = (j+1)%3; + + t = ei_sqrt(mat.coeff(i,i)-mat.coeff(j,j)-mat.coeff(k,k) + Scalar(1.0)); + q.coeffs().coeffRef(i) = Scalar(0.5) * t; + t = Scalar(0.5)/t; + q.w() = (mat.coeff(k,j)-mat.coeff(j,k))*t; + q.coeffs().coeffRef(j) = (mat.coeff(j,i)+mat.coeff(i,j))*t; + q.coeffs().coeffRef(k) = (mat.coeff(k,i)+mat.coeff(i,k))*t; + } + } +}; + +// set from a vector of coefficients assumed to be a quaternion +template +struct ei_quaternionbase_assign_impl +{ + typedef typename Other::Scalar Scalar; + template inline static void run(QuaternionBase& q, const Other& vec) + { + q.coeffs() = vec; + } +}; + + #endif // EIGEN_QUATERNION_H diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h index d0342febc..c608e4843 100644 --- a/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -45,6 +45,27 @@ ei_quaternion_product(const Quaternion& _a, const Quate return res; } +template struct ei_quat_product +{ + inline static Quat run(const QuaternionBase& _a, const QuaternionBase& _b) + { + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0,0,0,0x80000000)); + Quat res; + __m128 a = _a.coeffs().packet(0); + __m128 b = _b.coeffs().packet(0); + __m128 flip1 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,2), + ei_vec4f_swizzle1(b,2,0,1,2)),mask); + __m128 flip2 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,3,3,3,1), + ei_vec4f_swizzle1(b,0,1,2,1)),mask); + ei_pstore(&res.x(), + _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,ei_vec4f_swizzle1(b,3,3,3,3)), + _mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,0), + ei_vec4f_swizzle1(b,1,2,0,0))), + _mm_add_ps(flip1,flip2))); + return res; + } +}; + template struct ei_cross3_impl { inline static typename ei_plain_matrix_type::type From 6219f9acfa61e54baf266f816b7eaf9ffbd9841e Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 27 Oct 2009 15:45:24 -0400 Subject: [PATCH 34/34] * rename new Quat class to Quaternion, remove existing Quaternion * add Copyright line for Mathieu * cast() was broken (compile errors) and needed anyway to be in QuaternionBase * it's VectorBlock, don't pass additional parameter 1, it has different meaning!! * make it compile with GCC (put 'typename' at the right location) --- Eigen/src/Geometry/Quaternion.h | 578 +++---------------------- Eigen/src/Geometry/arch/Geometry_SSE.h | 23 +- 2 files changed, 60 insertions(+), 541 deletions(-) diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index f3f60a08a..67b040165 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2009 Mathieu Gautier // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -25,11 +26,6 @@ #ifndef EIGEN_QUATERNION_H #define EIGEN_QUATERNION_H -template -struct ei_quaternion_assign_impl; - /** \geometry_module \ingroup Geometry_Module * * \class Quaternion @@ -52,471 +48,12 @@ struct ei_quaternion_assign_impl; * \sa class AngleAxis, class Transform */ -template struct ei_traits > -{ - typedef _Scalar Scalar; -}; - -template -class Quaternion : public RotationBase,3> -{ - typedef RotationBase,3> Base; - - - -public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,4) - - using Base::operator*; - - /** the scalar type of the coefficients */ - typedef _Scalar Scalar; - - /** the type of the Coefficients 4-vector */ - typedef Matrix Coefficients; - /** the type of a 3D vector */ - typedef Matrix Vector3; - /** the equivalent rotation matrix type */ - typedef Matrix Matrix3; - /** the equivalent angle-axis type */ - typedef AngleAxis AngleAxisType; - - /** \returns the \c x coefficient */ - inline Scalar x() const { return m_coeffs.coeff(0); } - /** \returns the \c y coefficient */ - inline Scalar y() const { return m_coeffs.coeff(1); } - /** \returns the \c z coefficient */ - inline Scalar z() const { return m_coeffs.coeff(2); } - /** \returns the \c w coefficient */ - inline Scalar w() const { return m_coeffs.coeff(3); } - - /** \returns a reference to the \c x coefficient */ - inline Scalar& x() { return m_coeffs.coeffRef(0); } - /** \returns a reference to the \c y coefficient */ - inline Scalar& y() { return m_coeffs.coeffRef(1); } - /** \returns a reference to the \c z coefficient */ - inline Scalar& z() { return m_coeffs.coeffRef(2); } - /** \returns a reference to the \c w coefficient */ - inline Scalar& w() { return m_coeffs.coeffRef(3); } - - /** \returns a read-only vector expression of the imaginary part (x,y,z) */ - inline const Block vec() const { return m_coeffs.template start<3>(); } - - /** \returns a vector expression of the imaginary part (x,y,z) */ - inline Block vec() { return m_coeffs.template start<3>(); } - - /** \returns a read-only vector expression of the coefficients (x,y,z,w) */ - inline const Coefficients& coeffs() const { return m_coeffs; } - - /** \returns a vector expression of the coefficients (x,y,z,w) */ - inline Coefficients& coeffs() { return m_coeffs; } - - /** Default constructor leaving the quaternion uninitialized. */ - inline Quaternion() {} - - /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from - * its four coefficients \a w, \a x, \a y and \a z. - * - * \warning Note the order of the arguments: the real \a w coefficient first, - * while internally the coefficients are stored in the following order: - * [\c x, \c y, \c z, \c w] - */ - inline Quaternion(Scalar w, Scalar x, Scalar y, Scalar z) - { m_coeffs << x, y, z, w; } - - /** Copy constructor */ - inline Quaternion(const Quaternion& other) { m_coeffs = other.m_coeffs; } - - /** Constructs and initializes a quaternion from the angle-axis \a aa */ - explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; } - - /** Constructs and initializes a quaternion from either: - * - a rotation matrix expression, - * - a 4D vector expression representing quaternion coefficients. - * \sa operator=(MatrixBase) - */ - template - explicit inline Quaternion(const MatrixBase& other) { *this = other; } - - Quaternion& operator=(const Quaternion& other); - Quaternion& operator=(const AngleAxisType& aa); - template - Quaternion& operator=(const MatrixBase& m); - - /** \returns a quaternion representing an identity rotation - * \sa MatrixBase::Identity() - */ - inline static Quaternion Identity() { return Quaternion(1, 0, 0, 0); } - - /** \sa Quaternion::Identity(), MatrixBase::setIdentity() - */ - inline Quaternion& setIdentity() { m_coeffs << 0, 0, 0, 1; return *this; } - - /** \returns the squared norm of the quaternion's coefficients - * \sa Quaternion::norm(), MatrixBase::squaredNorm() - */ - inline Scalar squaredNorm() const { return m_coeffs.squaredNorm(); } - - /** \returns the norm of the quaternion's coefficients - * \sa Quaternion::squaredNorm(), MatrixBase::norm() - */ - inline Scalar norm() const { return m_coeffs.norm(); } - - /** Normalizes the quaternion \c *this - * \sa normalized(), MatrixBase::normalize() */ - inline void normalize() { m_coeffs.normalize(); } - /** \returns a normalized version of \c *this - * \sa normalize(), MatrixBase::normalized() */ - inline Quaternion normalized() const { return Quaternion(m_coeffs.normalized()); } - - /** \returns the dot product of \c *this and \a other - * Geometrically speaking, the dot product of two unit quaternions - * corresponds to the cosine of half the angle between the two rotations. - * \sa angularDistance() - */ - inline Scalar dot(const Quaternion& other) const { return m_coeffs.dot(other.m_coeffs); } - - inline Scalar angularDistance(const Quaternion& other) const; - - Matrix3 toRotationMatrix(void) const; - - template - Quaternion& setFromTwoVectors(const MatrixBase& a, const MatrixBase& b); - - inline Quaternion operator* (const Quaternion& q) const; - inline Quaternion& operator*= (const Quaternion& q); - - Quaternion inverse(void) const; - Quaternion conjugate(void) const; - - Quaternion slerp(Scalar t, const Quaternion& other) const; - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename ei_cast_return_type >::type cast() const - { return typename ei_cast_return_type >::type(*this); } - - /** Copy constructor with scalar type conversion */ - template - inline explicit Quaternion(const Quaternion& other) - { m_coeffs = other.coeffs().template cast(); } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const Quaternion& other, typename NumTraits::Real prec = precision()) const - { return m_coeffs.isApprox(other.m_coeffs, prec); } - - Vector3 _transformVector(Vector3 v) const; - -protected: - Coefficients m_coeffs; -}; - -/** \ingroup Geometry_Module - * single precision quaternion type */ -typedef Quaternion Quaternionf; -/** \ingroup Geometry_Module - * double precision quaternion type */ -typedef Quaternion Quaterniond; - -// Generic Quaternion * Quaternion product -template inline Quaternion -ei_quaternion_product(const Quaternion& a, const Quaternion& b) -{ - return Quaternion - ( - a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(), - a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(), - a.w() * b.y() + a.y() * b.w() + a.z() * b.x() - a.x() * b.z(), - a.w() * b.z() + a.z() * b.w() + a.x() * b.y() - a.y() * b.x() - ); -} - -/** \returns the concatenation of two rotations as a quaternion-quaternion product */ -template -inline Quaternion Quaternion::operator* (const Quaternion& other) const -{ - return ei_quaternion_product(*this,other); -} - -/** \sa operator*(Quaternion) */ -template -inline Quaternion& Quaternion::operator*= (const Quaternion& other) -{ - return (*this = *this * other); -} - -/** Rotation of a vector by a quaternion. - * \remarks If the quaternion is used to rotate several points (>1) - * then it is much more efficient to first convert it to a 3x3 Matrix. - * Comparison of the operation cost for n transformations: - * - Quaternion: 30n - * - Via a Matrix3: 24 + 15n - */ -template -inline typename Quaternion::Vector3 -Quaternion::_transformVector(Vector3 v) const -{ - // Note that this algorithm comes from the optimization by hand - // of the conversion to a Matrix followed by a Matrix/Vector product. - // It appears to be much faster than the common algorithm found - // in the litterature (30 versus 39 flops). It also requires two - // Vector3 as temporaries. - Vector3 uv = Scalar(2) * this->vec().cross(v); - return v + this->w() * uv + this->vec().cross(uv); -} - -template -inline Quaternion& Quaternion::operator=(const Quaternion& other) -{ - m_coeffs = other.m_coeffs; - return *this; -} - -/** Set \c *this from an angle-axis \a aa and returns a reference to \c *this - */ -template -inline Quaternion& Quaternion::operator=(const AngleAxisType& aa) -{ - Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings - this->w() = ei_cos(ha); - this->vec() = ei_sin(ha) * aa.axis(); - return *this; -} - -/** Set \c *this from the expression \a xpr: - * - if \a xpr is a 4x1 vector, then \a xpr is assumed to be a quaternion - * - if \a xpr is a 3x3 matrix, then \a xpr is assumed to be rotation matrix - * and \a xpr is converted to a quaternion - */ -template -template -inline Quaternion& Quaternion::operator=(const MatrixBase& xpr) -{ - ei_quaternion_assign_impl::run(*this, xpr.derived()); - return *this; -} - -/** Convert the quaternion to a 3x3 rotation matrix. The quaternion is required to - * be normalized, otherwise the result is undefined. - */ -template -inline typename Quaternion::Matrix3 -Quaternion::toRotationMatrix(void) const -{ - // NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!) - // if not inlined then the cost of the return by value is huge ~ +35%, - // however, not inlining this function is an order of magnitude slower, so - // it has to be inlined, and so the return by value is not an issue - Matrix3 res; - - const Scalar tx = 2*this->x(); - const Scalar ty = 2*this->y(); - const Scalar tz = 2*this->z(); - const Scalar twx = tx*this->w(); - const Scalar twy = ty*this->w(); - const Scalar twz = tz*this->w(); - const Scalar txx = tx*this->x(); - const Scalar txy = ty*this->x(); - const Scalar txz = tz*this->x(); - const Scalar tyy = ty*this->y(); - const Scalar tyz = tz*this->y(); - const Scalar tzz = tz*this->z(); - - res.coeffRef(0,0) = 1-(tyy+tzz); - res.coeffRef(0,1) = txy-twz; - res.coeffRef(0,2) = txz+twy; - res.coeffRef(1,0) = txy+twz; - res.coeffRef(1,1) = 1-(txx+tzz); - res.coeffRef(1,2) = tyz-twx; - res.coeffRef(2,0) = txz-twy; - res.coeffRef(2,1) = tyz+twx; - res.coeffRef(2,2) = 1-(txx+tyy); - - return res; -} - -/** Sets \c *this to be a quaternion representing a rotation between - * the two arbitrary vectors \a a and \a b. In other words, the built - * rotation represent a rotation sending the line of direction \a a - * to the line of direction \a b, both lines passing through the origin. - * - * \returns a reference to \c *this. - * - * Note that the two input vectors do \b not have to be normalized, and - * do not need to have the same norm. - */ -template -template -inline Quaternion& Quaternion::setFromTwoVectors(const MatrixBase& a, const MatrixBase& b) -{ - Vector3 v0 = a.normalized(); - Vector3 v1 = b.normalized(); - Scalar c = v1.dot(v0); - - // if dot == -1, vectors are nearly opposites - // => accuraletly compute the rotation axis by computing the - // intersection of the two planes. This is done by solving: - // x^T v0 = 0 - // x^T v1 = 0 - // under the constraint: - // ||x|| = 1 - // which yields a singular value problem - if (c < Scalar(-1)+precision()) - { - c = std::max(c,-1); - Matrix m; m << v0.transpose(), v1.transpose(); - SVD > svd(m); - Vector3 axis = svd.matrixV().col(2); - - Scalar w2 = (Scalar(1)+c)*Scalar(0.5); - this->w() = ei_sqrt(w2); - this->vec() = axis * ei_sqrt(Scalar(1) - w2); - return *this; - } - Vector3 axis = v0.cross(v1); - Scalar s = ei_sqrt((Scalar(1)+c)*Scalar(2)); - Scalar invs = Scalar(1)/s; - this->vec() = axis * invs; - this->w() = s * Scalar(0.5); - - return *this; -} - -/** \returns the multiplicative inverse of \c *this - * Note that in most cases, i.e., if you simply want the opposite rotation, - * and/or the quaternion is normalized, then it is enough to use the conjugate. - * - * \sa Quaternion::conjugate() - */ -template -inline Quaternion Quaternion::inverse() const -{ - // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite() ?? - Scalar n2 = this->squaredNorm(); - if (n2 > 0) - return Quaternion(conjugate().coeffs() / n2); - else - { - // return an invalid result to flag the error - return Quaternion(Coefficients::Zero()); - } -} - -/** \returns the conjugate of the \c *this which is equal to the multiplicative inverse - * if the quaternion is normalized. - * The conjugate of a quaternion represents the opposite rotation. - * - * \sa Quaternion::inverse() - */ -template -inline Quaternion Quaternion::conjugate() const -{ - return Quaternion(this->w(),-this->x(),-this->y(),-this->z()); -} - -/** \returns the angle (in radian) between two rotations - * \sa dot() - */ -template -inline Scalar Quaternion::angularDistance(const Quaternion& other) const -{ - double d = ei_abs(this->dot(other)); - if (d>=1.0) - return 0; - return Scalar(2) * std::acos(d); -} - -/** \returns the spherical linear interpolation between the two quaternions - * \c *this and \a other at the parameter \a t - */ -template -Quaternion Quaternion::slerp(Scalar t, const Quaternion& other) const -{ - static const Scalar one = Scalar(1) - precision(); - Scalar d = this->dot(other); - Scalar absD = ei_abs(d); - if (absD>=one) - return *this; - - // theta is the angle between the 2 quaternions - Scalar theta = std::acos(absD); - Scalar sinTheta = ei_sin(theta); - - Scalar scale0 = ei_sin( ( Scalar(1) - t ) * theta) / sinTheta; - Scalar scale1 = ei_sin( ( t * theta) ) / sinTheta; - if (d<0) - scale1 = -scale1; - - return Quaternion(scale0 * m_coeffs + scale1 * other.m_coeffs); -} - -// set from a rotation matrix -template -struct ei_quaternion_assign_impl -{ - typedef typename Other::Scalar Scalar; - inline static void run(Quaternion& q, const Other& mat) - { - // This algorithm comes from "Quaternion Calculus and Fast Animation", - // Ken Shoemake, 1987 SIGGRAPH course notes - Scalar t = mat.trace(); - if (t > 0) - { - t = ei_sqrt(t + Scalar(1.0)); - q.w() = Scalar(0.5)*t; - t = Scalar(0.5)/t; - q.x() = (mat.coeff(2,1) - mat.coeff(1,2)) * t; - q.y() = (mat.coeff(0,2) - mat.coeff(2,0)) * t; - q.z() = (mat.coeff(1,0) - mat.coeff(0,1)) * t; - } - else - { - int i = 0; - if (mat.coeff(1,1) > mat.coeff(0,0)) - i = 1; - if (mat.coeff(2,2) > mat.coeff(i,i)) - i = 2; - int j = (i+1)%3; - int k = (j+1)%3; - - t = ei_sqrt(mat.coeff(i,i)-mat.coeff(j,j)-mat.coeff(k,k) + Scalar(1.0)); - q.coeffs().coeffRef(i) = Scalar(0.5) * t; - t = Scalar(0.5)/t; - q.w() = (mat.coeff(k,j)-mat.coeff(j,k))*t; - q.coeffs().coeffRef(j) = (mat.coeff(j,i)+mat.coeff(i,j))*t; - q.coeffs().coeffRef(k) = (mat.coeff(k,i)+mat.coeff(i,k))*t; - } - } -}; - -// set from a vector of coefficients assumed to be a quaternion -template -struct ei_quaternion_assign_impl -{ - typedef typename Other::Scalar Scalar; - inline static void run(Quaternion& q, const Other& vec) - { - q.coeffs() = vec; - } -}; - -/*################################################################### - QuaternionBase and Map and Quat - ###################################################################*/ - template struct ei_quaternionbase_assign_impl; -template class Quat; // [XXX] => remove when Quat becomes Quaternion +template class Quaternion; // [XXX] => remove when Quaternion becomes Quaternion template struct ei_traits > @@ -528,7 +65,8 @@ struct ei_traits > }; template -class QuaternionBase : public RotationBase { +class QuaternionBase : public RotationBase +{ typedef RotationBase Base; public: using Base::operator*; @@ -563,10 +101,10 @@ public: inline Scalar& w() { return this->derived().coeffs().coeffRef(3); } /** \returns a read-only vector expression of the imaginary part (x,y,z) */ - inline const VectorBlock::Coefficients,3,1> vec() const { return this->derived().coeffs().template start<3>(); } + inline const VectorBlock::Coefficients,3> vec() const { return this->derived().coeffs().template start<3>(); } /** \returns a vector expression of the imaginary part (x,y,z) */ - inline VectorBlock::Coefficients,3,1> vec() { return this->derived().coeffs().template start<3>(); } + inline VectorBlock::Coefficients,3> vec() { return this->derived().coeffs().template start<3>(); } /** \returns a read-only vector expression of the coefficients (x,y,z,w) */ inline const typename ei_traits::Coefficients& coeffs() const { return this->derived().coeffs(); } @@ -582,7 +120,7 @@ public: /** \returns a quaternion representing an identity rotation * \sa MatrixBase::Identity() */ - inline static Quat Identity() { return Quat(1, 0, 0, 0); } + inline static Quaternion Identity() { return Quaternion(1, 0, 0, 0); } /** \sa Quaternion2::Identity(), MatrixBase::setIdentity() */ @@ -603,7 +141,7 @@ public: inline void normalize() { coeffs().normalize(); } /** \returns a normalized version of \c *this * \sa normalize(), MatrixBase::normalized() */ - inline Quat normalized() const { return Quat(coeffs().normalized()); } + inline Quaternion normalized() const { return Quaternion(coeffs().normalized()); } /** \returns the dot product of \c *this and \a other * Geometrically speaking, the dot product of two unit quaternions @@ -619,29 +157,38 @@ public: template QuaternionBase& setFromTwoVectors(const MatrixBase& a, const MatrixBase& b); - template inline Quat operator* (const QuaternionBase& q) const; + template inline Quaternion operator* (const QuaternionBase& q) const; template inline QuaternionBase& operator*= (const QuaternionBase& q); - Quat inverse(void) const; - Quat conjugate(void) const; + Quaternion inverse(void) const; + Quaternion conjugate(void) const; - template Quat slerp(Scalar t, const QuaternionBase& other) const; + template Quaternion slerp(Scalar t, const QuaternionBase& other) const; /** \returns \c true if \c *this is approximately equal to \a other, within the precision * determined by \a prec. * * \sa MatrixBase::isApprox() */ - bool isApprox(const QuaternionBase& other, typename RealScalar prec = precision()) const + bool isApprox(const QuaternionBase& other, RealScalar prec = precision()) const { return coeffs().isApprox(other.coeffs(), prec); } Vector3 _transformVector(Vector3 v) const; + /** \returns \c *this with scalar type casted to \a NewScalarType + * + * Note that if \a NewScalarType is equal to the current scalar type of \c *this + * then this function smartly returns a const reference to \c *this. + */ + template + inline typename ei_cast_return_type >::type cast() const + { + return typename ei_cast_return_type >::type( + coeffs().template cast()); + } }; -/* ########### Quat -> Quaternion */ - template -struct ei_traits > +struct ei_traits > { typedef _Scalar Scalar; typedef Matrix<_Scalar,4,1> Coefficients; @@ -651,18 +198,18 @@ struct ei_traits > }; template -class Quat : public QuaternionBase >{ - typedef QuaternionBase > Base; +class Quaternion : public QuaternionBase >{ + typedef QuaternionBase > Base; public: using Base::operator=; typedef _Scalar Scalar; - typedef typename ei_traits >::Coefficients Coefficients; + typedef typename ei_traits >::Coefficients Coefficients; typedef typename Base::AngleAxisType AngleAxisType; /** Default constructor leaving the quaternion uninitialized. */ - inline Quat() {} + inline Quaternion() {} /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from * its four coefficients \a w, \a x, \a y and \a z. @@ -671,38 +218,29 @@ public: * while internally the coefficients are stored in the following order: * [\c x, \c y, \c z, \c w] */ - inline Quat(Scalar w, Scalar x, Scalar y, Scalar z) + inline Quaternion(Scalar w, Scalar x, Scalar y, Scalar z) { coeffs() << x, y, z, w; } /** Constructs and initialize a quaternion from the array data * This constructor is also used to map an array */ - inline Quat(const Scalar* data) : m_coeffs(data) {} + inline Quaternion(const Scalar* data) : m_coeffs(data) {} /** Copy constructor */ -// template inline Quat(const QuaternionBase& other) { m_coeffs = other.coeffs(); } [XXX] redundant with 703 +// template inline Quaternion(const QuaternionBase& other) { m_coeffs = other.coeffs(); } [XXX] redundant with 703 /** Constructs and initializes a quaternion from the angle-axis \a aa */ - explicit inline Quat(const AngleAxisType& aa) { *this = aa; } + explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; } /** Constructs and initializes a quaternion from either: * - a rotation matrix expression, * - a 4D vector expression representing quaternion coefficients. */ template - explicit inline Quat(const MatrixBase& other) { *this = other; } - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename ei_cast_return_type >::type cast() const - { return typename ei_cast_return_type >::type(*this); } + explicit inline Quaternion(const MatrixBase& other) { *this = other; } /** Copy constructor with scalar type conversion */ template - inline explicit Quat(const QuaternionBase& other) + inline explicit Quaternion(const QuaternionBase& other) { m_coeffs = other.coeffs().template cast(); } inline Coefficients& coeffs() { return m_coeffs;} @@ -712,9 +250,9 @@ protected: Coefficients m_coeffs; }; -/* ########### Map */ +/* ########### Map */ -/** \class Map +/** \class Map * \nonstableyet * * \brief Expression of a quaternion @@ -724,8 +262,8 @@ protected: * \sa class Quaternion, class QuaternionBase */ template -struct ei_traits, _PacketAccess> >: -ei_traits > +struct ei_traits, _PacketAccess> >: +ei_traits > { typedef _Scalar Scalar; typedef Map > Coefficients; @@ -735,14 +273,14 @@ ei_traits > }; template -class Map, PacketAccess > : public QuaternionBase, PacketAccess> >, ei_no_assignment_operator { +class Map, PacketAccess > : public QuaternionBase, PacketAccess> >, ei_no_assignment_operator { public: typedef _Scalar Scalar; - typedef typename ei_traits, PacketAccess> >::Coefficients Coefficients; + typedef typename ei_traits, PacketAccess> >::Coefficients Coefficients; - inline Map, PacketAccess >(const Scalar* coeffs) : m_coeffs(coeffs) {} + inline Map, PacketAccess >(const Scalar* coeffs) : m_coeffs(coeffs) {} inline Coefficients& coeffs() { return m_coeffs;} inline const Coefficients& coeffs() const { return m_coeffs;} @@ -751,16 +289,16 @@ class Map, PacketAccess > : public QuaternionBase > QuaternionMapd; -typedef Map > QuaternionMapf; -typedef Map, Aligned> QuaternionMapAlignedd; -typedef Map, Aligned> QuaternionMapAlignedf; +typedef Map > QuaternionMapd; +typedef Map > QuaternionMapf; +typedef Map, Aligned> QuaternionMapAlignedd; +typedef Map, Aligned> QuaternionMapAlignedf; // Generic Quaternion * Quaternion product template struct ei_quat_product { - inline static Quat run(const QuaternionBase& a, const QuaternionBase& b){ - return Quat + inline static Quaternion run(const QuaternionBase& a, const QuaternionBase& b){ + return Quaternion ( a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(), a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(), @@ -773,12 +311,12 @@ template template -inline Quat >::Scalar> QuaternionBase::operator* (const QuaternionBase& other) const +inline Quaternion >::Scalar> QuaternionBase::operator* (const QuaternionBase& other) const { EIGEN_STATIC_ASSERT((ei_is_same_type::ret), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) return ei_quat_product::Scalar, + typename ei_traits::Scalar, ei_traits::PacketAccess && ei_traits::PacketAccess>::run(*this, other); } @@ -938,16 +476,16 @@ inline QuaternionBase& QuaternionBase::setFromTwoVectors(const * \sa Quaternion2::conjugate() */ template -inline Quat >::Scalar> QuaternionBase::inverse() const +inline Quaternion >::Scalar> QuaternionBase::inverse() const { // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite() ?? Scalar n2 = this->squaredNorm(); if (n2 > 0) - return Quat(conjugate().coeffs() / n2); + return Quaternion(conjugate().coeffs() / n2); else { // return an invalid result to flag the error - return Quat(ei_traits::Coefficients::Zero()); + return Quaternion(ei_traits::Coefficients::Zero()); } } @@ -958,9 +496,9 @@ inline Quat >::Scalar> QuaternionBase * \sa Quaternion2::inverse() */ template -inline Quat >::Scalar> QuaternionBase::conjugate() const +inline Quaternion >::Scalar> QuaternionBase::conjugate() const { - return Quat(this->w(),-this->x(),-this->y(),-this->z()); + return Quaternion(this->w(),-this->x(),-this->y(),-this->z()); } /** \returns the angle (in radian) between two rotations @@ -981,13 +519,13 @@ inline typename ei_traits >::Scalar QuaternionBase template -Quat >::Scalar> QuaternionBase::slerp(Scalar t, const QuaternionBase& other) const +Quaternion >::Scalar> QuaternionBase::slerp(Scalar t, const QuaternionBase& other) const { static const Scalar one = Scalar(1) - precision(); Scalar d = this->dot(other); Scalar absD = ei_abs(d); if (absD>=one) - return Quat(*this); + return Quaternion(*this); // theta is the angle between the 2 quaternions Scalar theta = std::acos(absD); @@ -998,7 +536,7 @@ Quat >::Scalar> QuaternionBase(scale0 * coeffs() + scale1 * other.coeffs()); + return Quaternion(scale0 * coeffs() + scale1 * other.coeffs()); } // set from a rotation matrix diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h index c608e4843..1b8f6aead 100644 --- a/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -26,31 +26,12 @@ #ifndef EIGEN_GEOMETRY_SSE_H #define EIGEN_GEOMETRY_SSE_H -template<> inline Quaternion -ei_quaternion_product(const Quaternion& _a, const Quaternion& _b) -{ - const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0,0,0,0x80000000)); - Quaternion res; - __m128 a = _a.coeffs().packet(0); - __m128 b = _b.coeffs().packet(0); - __m128 flip1 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,2), - ei_vec4f_swizzle1(b,2,0,1,2)),mask); - __m128 flip2 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,3,3,3,1), - ei_vec4f_swizzle1(b,0,1,2,1)),mask); - ei_pstore(&res.x(), - _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,ei_vec4f_swizzle1(b,3,3,3,3)), - _mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,0), - ei_vec4f_swizzle1(b,1,2,0,0))), - _mm_add_ps(flip1,flip2))); - return res; -} - template struct ei_quat_product { - inline static Quat run(const QuaternionBase& _a, const QuaternionBase& _b) + inline static Quaternion run(const QuaternionBase& _a, const QuaternionBase& _b) { const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0,0,0,0x80000000)); - Quat res; + Quaternion res; __m128 a = _a.coeffs().packet(0); __m128 b = _b.coeffs().packet(0); __m128 flip1 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,2),