Compare commits

..

252 Commits

Author SHA1 Message Date
Thomas Capricelli
cd6c258f44 Fix a previous mistake in moving the 2.0-beta5 in svn for some last-minute
fixes.
2009-05-16 17:23:42 +02:00
Thomas Capricelli
bd9c32e2f1 Remove this old file. It was stalling in history because of a bug in svn,
which did not prevent the commit (svn r722564) to 'svn copy' a directory
called 'Core/' on top of an existing file 'Core'

see http://websvn.kde.org/?view=rev&revision=722564
2009-05-16 17:20:44 +02:00
convert-repo
e0554752d8 update tags 2009-05-16 14:05:54 +00:00
Benoit Jacob
dd390470e1 simplification (no reason anymore to write that in that convoluted way) 2009-05-15 16:05:45 +00:00
Benoit Jacob
5ee9f1a705 argh, forgot to re-add the throw() 2009-05-15 15:54:52 +00:00
Benoit Jacob
126284d08b * fix bugs in EigenTesting.cmake: it didn't work with -DEIGEN_NO_ASSERTION_CHECKING=ON
* only try...catch if exceptions are enabled
2009-05-15 15:53:26 +00:00
Benoit Jacob
7c14e1eac4 add partial-pivoting LU decomposition
the name 'PartialLU' is not meant to be definitive!
make inverse() and determinant() use it, so it's *almost* considered
well tested.
2009-05-13 02:02:22 +00:00
Gael Guennebaud
877c3c00a2 enable testing of complex numbers for taucs 2009-05-12 13:43:40 +00:00
Gael Guennebaud
159c99a288 fix adolc unit test for dynamic sizes 2009-05-12 07:38:46 +00:00
Gael Guennebaud
f5b5571a5a compilation fixes 2009-05-12 07:32:34 +00:00
Gael Guennebaud
9b256d997e various minor updates of some unit tests 2009-05-11 11:09:41 +00:00
Gael Guennebaud
6a4e94f349 bugfix from Jens Mueller (s/RowMajor/IsRowMajor) 2009-05-11 10:59:27 +00:00
Benoit Jacob
9afd1324fd constant Diagonal ---> DiagonalBits
introduce ei_is_diagonal to check for it
DiagonalCoeffs ---> Diagonal and allow Index to by Dynamic
-> add MatrixBase::diagonal(int) with unittest and doc
2009-05-10 16:24:39 +00:00
Benoit Jacob
eac79b6d2e CREDIT Hauke Heibel, fix MSVC warnings 2009-05-09 03:41:17 +00:00
Benoit Jacob
3b79d99f71 *make coeff() return a const reference (required with the recent change with CoeffReturnType)
*fix a unused variable warning
2009-05-09 03:39:31 +00:00
Benoit Jacob
c500415e18 result of our experiments with LU tuning: implement very simple formula, that
turns out to be similar to Higham's formula already in use in LDLt
2009-05-07 20:35:26 +00:00
Gael Guennebaud
b6c76c30cd apply patch from Hauke Heibel cleaning overloaded operator new/detete 2009-05-07 20:33:48 +00:00
Benoit Jacob
47fc3858b9 oops, didn't want to commit that 2009-05-07 18:40:06 +00:00
Benoit Jacob
c8a22dbc08 CREDIT Hauke Heibel, more std::vector::insert fixes 2009-05-07 18:38:07 +00:00
Benoit Jacob
159ab4a043 CREDIT Hauke Heibel, windows compatibility fixes in MatrixExponential 2009-05-07 18:11:49 +00:00
Gael Guennebaud
7080282a6d forgot a svn add CMakeLists.txt 2009-05-07 14:49:56 +00:00
Gael Guennebaud
6dffdca123 fix realloc when initial size was 0 (bug reported by Jens Mueller) 2009-05-07 13:13:42 +00:00
Benoit Jacob
4f0af00e51 *add missing overloads of setZero, etc... that were mentioned in the tutorial
--->they go into Matrix as they resize.
*add isConstant() alias to isApproxToConstant()
*extend unit-test
*change an assert into a static assert
2009-05-06 21:40:24 +00:00
Benoit Jacob
0c0d38272e add copyright on two public headers that are not so trivial... 2009-05-06 15:48:28 +00:00
Benoit Jacob
e1abffa513 add missing function, thanks to Hauke Heibel 2009-05-06 13:33:35 +00:00
Gael Guennebaud
3361ad4589 one more gcc 3.3 fix 2009-05-06 10:51:46 +00:00
Gael Guennebaud
db079f3a8c fix internal error with gcc 3.3 (all tests are now ok with 3.3 !) 2009-05-06 09:37:30 +00:00
Gael Guennebaud
1e286464ab * compilation fixes for gcc 3.3
* test Part::swap
2009-05-06 08:43:38 +00:00
Gael Guennebaud
23f073625d add -Wextra only if compiler supports it 2009-05-06 08:15:54 +00:00
Benoit Jacob
834eb5bfc8 new unsupported module by Jitse Niesen: matrix exponential 2009-05-05 20:46:55 +00:00
Benoit Jacob
66f059b99d remove unused member remean that was just initialized 2009-05-05 17:29:44 +00:00
Benoit Jacob
46e1162a39 reimplement linearRegression on top of fitHyperplane which is much better 2009-05-05 17:15:35 +00:00
Benoit Jacob
2b2f0c0220 fix linearRegression, fix doc, add unit test (it was untested since the change
making fitHyperplane no longer use it)
2009-05-05 16:50:58 +00:00
Gael Guennebaud
5b364a68cb oops 2009-05-04 14:53:21 +00:00
Gael Guennebaud
2829314284 new simplified API to fill sparse matrices (the old functions are
deprecated). Basically there are now only 2 functions to set a
coefficient:
1) mat.coeffRef(row,col) = value;
2) mat.insert(row,col) = value;
coeffRef has no limitation, insert assumes the coeff has not already
been set, and raises an assert otherwise.
In addition I added a much lower level, but more efficient filling
mechanism for
internal use only.
2009-05-04 14:25:12 +00:00
Thomas Capricelli
ddb6e96d48 fix warnings with recent gcc 2009-05-04 13:55:21 +00:00
Benoit Jacob
dec09a618d fix warning, unused variable dummy 2009-05-04 13:01:23 +00:00
Benoit Jacob
b60571a193 fix warnings with unused static functions 2009-05-04 12:49:56 +00:00
Benoit Jacob
8aa8854bbf fix SSE2 detection on win64, reported by 'kajala' 2009-05-04 12:13:37 +00:00
Gael Guennebaud
c25fc50d54 add an AlignedVector3 module suitable for vectorization of 3D vectors 2009-05-03 17:19:19 +00:00
Gael Guennebaud
469b8aa380 "forgot to commit the required changes in stdvector unit test" 2009-05-03 15:39:37 +00:00
Benoit Jacob
95bda5e6ab let the user disable alignment altogether by #defining EIGEN_DONT_ALIGN.
Until now, the user had to edit the source code to do that.
Internally, add EIGEN_ALIGN that takes into account both EIGEN_DONT_ALIGN.and
EIGEN_ARCH_WANTS_ALIGNMENT. From now on, only EIGEN_ALIGN should be used to
test whether we want to align.
2009-05-03 13:50:56 +00:00
Gael Guennebaud
facee57b8d add auto transposition for vectors 2009-04-30 12:09:45 +00:00
Gael Guennebaud
7029ed6b88 ok, this time cast should really work ; sorry for the noise 2009-04-29 17:39:39 +00:00
Gael Guennebaud
428a12902a fixed my mistake in cast 2009-04-29 17:05:19 +00:00
Benoit Jacob
746079f75d gni, forgot to call the new subtest 2009-04-29 17:01:10 +00:00
Gael Guennebaud
9e9e99a42e casting to the same type no longer generates a CwiseUnaryOp 2009-04-29 14:57:18 +00:00
Benoit Jacob
8b1e7c2792 add cast<>() tests. including a vectorization_logic test that currently fails (casting to same type should not prevent vectorization) 2009-04-29 14:51:19 +00:00
Gael Guennebaud
e722f55382 fix the Matrix(int,int)/vector 2D ctors issue so that we really
have a Matrix(Scalar,Scalar) ctor. (useful for std::complex, user
defined types, etc.
2009-04-24 14:38:40 +00:00
Benoit Jacob
32e9801890 CREDIT Ross Smith: fix posix_memalign detection 2009-04-24 13:26:36 +00:00
Gael Guennebaud
e201091d89 remove unsupported/doc from make's "all" target 2009-04-23 12:22:10 +00:00
Gael Guennebaud
acb32c69d4 * update BVH to explicitely use aligned_allocator
* fix warning in StdVector
2009-04-23 11:33:36 +00:00
Gael Guennebaud
c7bb7436f9 make the ei_p* math functions overloads instead of template
specializations
2009-04-22 21:35:50 +00:00
Gael Guennebaud
e0904a70ce update STL vector doc 2009-04-22 21:29:42 +00:00
Gael Guennebaud
6f6b5ad016 add a generic version of std::vector::resize for other stl
implementations
2009-04-21 21:40:33 +00:00
Gael Guennebaud
2697877fac std::vector now explicitely requires the use of Eigen::aligned_allocator
CREDIT Hauke Heibel
2009-04-21 21:30:38 +00:00
Gael Guennebaud
c5c384cf06 add missing vector ctor reported by Markus Moll on the ML 2009-04-21 11:52:25 +00:00
Gael Guennebaud
5e5bac52d7 this should fix the linking issue with StdVector without any user
changes... I cross my fingers...
2009-04-21 11:44:58 +00:00
Gael Guennebaud
4efcc14b91 fix ctor issues with ei_workaround_msvc_std_vector 2009-04-21 08:12:07 +00:00
Christian Ehrlicher
b14d1139df not compilable with msvc :( 2009-04-15 17:13:23 +00:00
Gael Guennebaud
c744960984 patch from Ilya Baran: This small patch fixes a potential initialization
bug in BVAlgorithms and slightly corrects the BVH doc.
2009-04-15 05:54:07 +00:00
Benoit Jacob
6ee4eb94fb fix the 4x4 inverse -- unit test passes again 2009-04-14 13:42:23 +00:00
Benoit Jacob
2bb1c9e8dc finally commit Rohit's work as the start of a new (currently
unsupported) module, MoreVectorization.

CCMAIL:rpg.314@gmail.com
2009-04-14 13:15:53 +00:00
Gael Guennebaud
804a239d30 patch from Moritz Lenz to allow solving transposed problem with superlu 2009-04-10 19:54:43 +00:00
Gael Guennebaud
fb3078fb62 fix memory leak in superlu backend 2009-04-10 18:49:38 +00:00
Gael Guennebaud
7254201632 bugfix when the diagonal is not stored and assumed to be 1 2009-04-10 18:13:37 +00:00
Gael Guennebaud
22edf77470 add a 4x4 inverse case which is not handled by the current
ei_compute_inverse_in_size4_case (reported by mikola on IRC)
2009-04-09 21:55:29 +00:00
Gael Guennebaud
ea41a18b80 clean asm comments 2009-04-09 21:29:31 +00:00
Gael Guennebaud
d78eb02627 add aligned_allocator operator == and != as suggested by Hauke Heibel 2009-04-09 21:22:02 +00:00
Benoit Jacob
0c99de5a17 more patches from Hauke Heibel: compilation/warning fixes from VC++ 2009-04-09 17:19:17 +00:00
Benoit Jacob
e6332cba4b forward-port r951449: patch by Hauke Heibel: compile fix with VS 9 2009-04-09 12:06:13 +00:00
Gael Guennebaud
e8329f9f45 relicence Julien Pommier's SSE code to Eigen's licenses 2009-04-09 06:03:51 +00:00
Gael Guennebaud
aabca5a44f stupid typo 2009-04-06 13:35:48 +00:00
Benoit Jacob
502bf4a81d * fix the binary bloat issue, Rohit's idea was the good one
* a few dox fixes (alloc routines do return 0 on error) and forgot to update version number in CMakeLists
2009-04-06 13:33:42 +00:00
Gael Guennebaud
38f501a596 fix computation of aligned_bit (has been broken by the change of
AutoAlign/DontAlign)
2009-04-05 17:34:59 +00:00
Gael Guennebaud
5b1d0cebc5 sparse module: new API proposal for triangular solves and experimental
solver support with a sparse matrix as the rhs.
2009-04-05 16:30:10 +00:00
Jos van den Oever
5a628567b0 Compile fix. 2009-04-05 14:26:47 +00:00
Kenneth Frank Riddile
9a8096dd1d * fixed truncation warnings caused by MatrixBase::CoeffReturnType under MSVC 2009-04-03 20:19:07 +00:00
Gael Guennebaud
ff3a3209ca add an assertion in sparse LLT for invalid input matrix 2009-04-03 08:30:15 +00:00
Gael Guennebaud
adf5104bae oops, bad copy paste in ei_psqrt, thanks to Jitse Niesen 2009-04-02 06:29:05 +00:00
Benoit Jacob
40432c84b9 CwiseUnaryOp -> SparseCwiseUnaryOp 2009-04-01 14:47:56 +00:00
Gael Guennebaud
0170eb0dbe add an auto-diff module in unsupported. it is similar to adolc's forward
mode but the advantage of using Eigen's expression template to compute
the derivatives (unless you nest an AutoDiffScalar into an Eigen's
matrix).
2009-04-01 14:43:37 +00:00
Benoit Jacob
0f8e692b3f * Find SuperLU also when it is installed without a superlu/ prefix
* Some more CoeffReturnType changes
2009-04-01 14:07:38 +00:00
Benoit Jacob
113fc3a260 now if Derived has the DirectAccess flag, then MatrixBase<Derived>::coeff() const returns a const Scalar& and not a Scalar as before.
useful for people doing direct access. + 1 bugfix thanks to Patrick Mihelich, forgot a & in MapBase::coeff(int).
2009-04-01 00:39:56 +00:00
Benoit Jacob
2f45eeb0c6 More Cholesky fixes.
* Cholesky decs are NOT rank revealing so remove all the rank/isPositiveDefinite etc stuff.
* fix bug in LLT: s/return/continue/
* introduce machine_epsilon constants, they are actually needed for Higman's formula determining
  the cutoff in Cholesky. Btw fix the page reference to his book (chat with Keir).
* solve methods always return true, since this isn't a rank revealing dec. Actually... they already did always return true!! Now it's explicit.
* updated dox and unit-test
2009-04-01 00:21:16 +00:00
Benoit Jacob
8e2b191acf fix typo found by noir.sender from KDE forums 2009-03-31 17:00:30 +00:00
Benoit Jacob
1e6097a810 fix mistake in static assertion, patch by Markus Moll. 2009-03-31 16:07:12 +00:00
Benoit Jacob
bf596d0b3a add adjointInPlace() and add documentation warnings on adjoint() and transpose() about aliasing effects. 2009-03-31 13:55:40 +00:00
Benoit Jacob
a1ba995f05 Many improvements in LLT and LDLT:
* in LDLT, support the negative semidefinite case
* fix bad floating-point comparisons, improves greatly the accuracy of methods like
  isPositiveDefinite() and rank()
* simplifications
* identify (but not resolve) bug: claim that only triangular part is used, is inaccurate
* expanded unit-tests
2009-03-30 21:45:45 +00:00
Gael Guennebaud
df9dfa1455 fix superLU backend: missing operator= 2009-03-27 19:25:22 +00:00
Gael Guennebaud
e9f6167485 make special ei_p functions static to avoid linking issues (they are too
complex to be inlined)
2009-03-27 14:55:46 +00:00
Gael Guennebaud
49fc1e3e84 add vectorization of sqrt for float 2009-03-27 14:41:46 +00:00
Gael Guennebaud
3499f6eccd fix Taucs support (it appears Taucs does not return sorted matrices) 2009-03-26 17:11:43 +00:00
Benoit Jacob
1b7b538e05 The ABI break:
* set AutoAlign=0, DontAlign!=0
* set Dynamic=33331
* add check on fixed sizes
* bump version to 2.0.52
2009-03-26 16:30:54 +00:00
Gael Guennebaud
ce5669dbf9 * enable vectorization of sin, cos, etc. by default with an option to
disable them (-DEIGEN_FAST_MATH=0)
* add a specialization of MatrixBase::operator*(RealScalar) for fast
  "matrix of complex" times scalar products (even more useful for
  autodiff scalar types)
2009-03-26 12:50:24 +00:00
Gael Guennebaud
62de40f8bb oops forgot to include a file in previous commit (I had other local
changes I did not want to commit yet...)
2009-03-26 07:10:59 +00:00
Gael Guennebaud
a22ef7e1f3 for some reason passing the argument by const reference killed the perf
(in the packet version of sin, cos, exp, lop), so let's pass them by
value. Also, improve the perf of ei_plog by reducing dependencies.
2009-03-25 18:33:36 +00:00
Gael Guennebaud
17860e578c add SSE2 versions of sin, cos, log, exp using code from Julien
Pommier. They are for float only, and they return exactly the same
result as the standard versions in about 90% of the cases. Otherwise the max error
is below 1e-7. However, for very large values (>1e3) the accuracy of sin and cos
slighlty decrease. They are about 3 or 4 times faster than 4 calls to their respective
standard versions. So, is it ok to enable them by default in their respective functors ?
2009-03-25 12:26:13 +00:00
Andrew Coles
64fbd93cd9 As values may be used uninitialised, they have now been given
sensible defaults; or, in other words, if worse comes to worst,
we'll get a guaranteed segfault rather than a heisenburg.
2009-03-23 21:15:33 +00:00
Gael Guennebaud
70c0174bf9 * allows fixed size matrix with size==0 (via a specialization of
MatrixStorage returning a null pointer). For instance this is very
  useful to make Tridiagonalization compile for 1x1 matrices
* fix LLT and eigensolver for 1x1 matrix
2009-03-23 14:44:44 +00:00
Gael Guennebaud
f4cf5e9b26 split and extend eigen-solver tests 2009-03-23 14:38:59 +00:00
Konstantinos A. Margaritis
fe00e864a1 ei_pnegate implemented for AltiVec 2009-03-20 17:26:50 +00:00
Gael Guennebaud
fbf415c547 add vectorization of unary operator-() (the AltiVec version is probably
broken)
2009-03-20 10:03:24 +00:00
Gael Guennebaud
4bb5221d22 Add BVH module in unsupported (patch from Ilya Baran)
(I thought I committed it a week ago but it seems the command failed)
2009-03-18 20:06:06 +00:00
Gael Guennebaud
3d385ae968 angle-axis doc: make it more clear the axis must be normalized 2009-03-18 19:38:55 +00:00
Gael Guennebaud
dcf49e5a28 more MSVC fixes: restrict keywords (sorry for all these commits) 2009-03-17 13:32:26 +00:00
Gael Guennebaud
718af05517 more MSVC fixes (asm comments...) 2009-03-17 13:25:26 +00:00
Gael Guennebaud
497d77b08c fix compilation MSVC 2009-03-17 13:18:53 +00:00
Gael Guennebaud
312994fa98 opengl demo: add aligned operator new where appropriate and remove my
mess...
2009-03-13 13:17:19 +00:00
Gael Guennebaud
c6264d9b7e add affine * homogeneous vector for backward compatibility (e.g.,
kgllib)
2009-03-11 14:32:03 +00:00
Gael Guennebaud
02e97ac0ae minor changes in AlignedBox needed for BVH module 2009-03-11 14:21:55 +00:00
Gael Guennebaud
b8f46090ff add optimized cross3 function (code from Rohit Garg) 2009-03-11 14:20:36 +00:00
Gael Guennebaud
3298320007 fix doxygen generation of unsupported modules 2009-03-11 13:11:38 +00:00
Gael Guennebaud
f697ea6d30 fix a few compilation errors and warnings (ICC) 2009-03-11 08:45:53 +00:00
Gael Guennebaud
14691d6836 fix compilation with old, and future gcc 2009-03-10 11:55:50 +00:00
Gael Guennebaud
3e4307d8a8 compilation fix in MapBase 2009-03-10 09:06:38 +00:00
Gael Guennebaud
caa1ef7515 various BTL updates (disable Cholesky for MTL, add new plot settings,
etc)
2009-03-09 23:28:46 +00:00
Gael Guennebaud
8aa5aa269a add "slice vectorization" of redux (eg. m.block().minCoeff() is now
vectorized)
2009-03-09 23:16:39 +00:00
Gael Guennebaud
c087373968 because of a missing specialization, operator/(scalar) was not vectorized 2009-03-09 23:14:53 +00:00
Gael Guennebaud
db6c3d0197 fix MapBase's ForceAligned concept which was not working at all.... 2009-03-09 19:23:31 +00:00
Gael Guennebaud
3f80c68be5 add the vectorization of abs 2009-03-09 18:40:09 +00:00
Gael Guennebaud
bd8107c90c forgot to include a file in previous commit 2009-03-09 14:18:29 +00:00
Gael Guennebaud
8a424efb11 add an option to bench eigen without GCC's auto vec (might conflict with
Eigen's auto vec)
2009-03-09 14:16:05 +00:00
Gael Guennebaud
314aff875e fix warning in Tridiag 2009-03-09 10:41:37 +00:00
Gael Guennebaud
44f218988c add a small note in Transform doc. 2009-03-08 11:38:45 +00:00
Gael Guennebaud
3ac42fed94 big rework of the Transform class:
* add Projective and AffineCompact modes as an optional third template
  argument
* extend Transform::operator* to support more use cases
2009-03-08 11:35:30 +00:00
Gael Guennebaud
7718a8ed83 slight optimization of SSE base integer mul (thanks to Rohit Garg) 2009-03-08 10:14:07 +00:00
Jure Repinc
f9790a649c Install the newly added eigen2/Eigen/src/Geometry/arch/Geometry_SSE.h 2009-03-08 06:21:21 +00:00
Gael Guennebaud
e4f64ce098 add optimized quaternion * quaternion product specialization for
float/SSE using code from Rohit Garg
2009-03-07 13:52:44 +00:00
Gael Guennebaud
6f95270ede significantly reduce the default stack allocation limit which was much
too high
2009-03-06 16:52:43 +00:00
Gael Guennebaud
2c78ca62a6 patch from Stjepan Rajko (MSVC fix in RotationBase) 2009-03-06 11:45:39 +00:00
Andrew Coles
093e3cd5b5 Commented out duplicate definition of TransformTraits - was causing
compile-time errors.
2009-03-05 22:35:06 +00:00
Gael Guennebaud
fa9f7708d4 add efficient matrix product specializations for Homogeneous 2009-03-05 16:40:56 +00:00
Gael Guennebaud
31332fca0b remove bad #include of SelfadjointRank2Update.h 2009-03-05 10:29:20 +00:00
Gael Guennebaud
0be89a4796 big addons:
* add Homogeneous expression for vector and set of vectors (aka matrix)
  => the next step will be to overload operator*
* add homogeneous normalization (again for vector and set of vectors)
* add a Replicate expression (with uni-directional replication
  facilities)
=> for all of them I'll add examples once we agree on the API
* fix gcc-4.4 warnings
* rename reverse.cpp array_reverse.cpp
2009-03-05 10:25:22 +00:00
Gael Guennebaud
d710ccd41e BTL: add syr2 action 2009-03-05 08:15:32 +00:00
Gael Guennebaud
a72ff5abc1 BTL: - patch from Victor (add ACML support)
- fix overflow issues
2009-03-05 08:11:47 +00:00
Gael Guennebaud
6a26506341 add ReturnByValue pseudo expression for in-place evaluation with a
return-by-value API style (will soon use it for the transform products)
2009-03-04 13:00:00 +00:00
Gael Guennebaud
45136ac3b6 various update of of BTL 2009-03-04 07:21:17 +00:00
Gael Guennebaud
3288e9e168 add much faster versions of unaligned stores (and slightly faster
unaligned loads)
2009-03-03 14:01:30 +00:00
Gael Guennebaud
c4601314be cleaned Tridiagonalization impl, with improved performance 2009-03-02 15:07:39 +00:00
Gael Guennebaud
ed134a0ce5 performance improvement: rewrite of the matrix-matrix product following
Goto's paper => x1.4 speedup with more consistent perf results
2009-03-02 13:15:15 +00:00
Gael Guennebaud
8ed186b9ab improve WIP new matrix product 2009-02-27 17:18:52 +00:00
Gael Guennebaud
40774c625e add a proof of concept autodiff jacobian helper class based on adolc
with unit test and FindAdolc cmake module
2009-02-27 16:19:13 +00:00
Gael Guennebaud
170128770a compilation fix + test orho methods for complex 2009-02-26 10:09:23 +00:00
Gael Guennebaud
91af389a9a fix unitOrthogonal() for size > 3 2009-02-25 09:23:09 +00:00
Laurent Montel
2d6d14a3d3 Add COMPONENT Devel 2009-02-23 07:50:56 +00:00
Laurent Montel
8e7c4df0db Fix install header 2009-02-22 10:28:31 +00:00
Gael Guennebaud
de014efdaf * split CacheFriendlyProduct into multiple smaller files
* add an efficient selfadjoint * vector implementation (= blas symv)
  perf are inbetween MKL and GOTO
  => the interface is still missing (have to be rethougth)
2009-02-21 20:20:38 +00:00
Gael Guennebaud
3d86dcf473 oops, got confused by the preprocessor directives around
posix_memalign...
2009-02-21 16:35:57 +00:00
Gael Guennebaud
7c4f9ecf0c fix posix_memalign return value warning 2009-02-21 16:23:18 +00:00
Benoit Jacob
177500f37e fix typo found by markusfroeb (forum) 2009-02-21 14:30:49 +00:00
Daniel Gomez Ferro
032880074e Added new product implementation.
Just works for square, power of 2 matrices of floats.
2009-02-20 22:23:25 +00:00
Gael Guennebaud
7485aa6d57 add symv bench 2009-02-20 21:05:19 +00:00
Keir Mierle
22b9de1849 Fix some formatting in quick ref. Add references to headers. 2009-02-19 16:46:35 +00:00
Gael Guennebaud
e2ee7a6a58 increase version number for step 2009-02-19 15:46:10 +00:00
Gael Guennebaud
752d95c20e eventually c++ does not provide any optimized pow(int,int) function,
so here you go :) (should also fix Timothy's troubles)
2009-02-18 18:24:31 +00:00
Gael Guennebaud
747d83a05a add size, rows, cols, (), (,) functions in ASCII ref 2009-02-18 16:35:16 +00:00
Gael Guennebaud
eb2cc8f502 patch from Jitse Niesen: fix ascii quick ref matlab operator ' 2009-02-18 16:24:15 +00:00
Gael Guennebaud
8f83b37b2a fix install of IterativeSolvers module 2009-02-18 13:11:29 +00:00
Gael Guennebaud
23f543ee5e add the ASCII quick reference made by Kier 2009-02-18 10:27:18 +00:00
Gael Guennebaud
21161d8bcf fix typo in FindTaucs.cmake 2009-02-17 10:34:17 +00:00
Gael Guennebaud
ee04c5c874 add tests showing bug in unitOrthogonal such that we don't forget it! 2009-02-17 09:57:32 +00:00
Gael Guennebaud
e6f1104b57 * fix Quaternion::setFromTwoVectors (thanks to "benv" from the forum)
* extend PartialRedux::cross() to any matrix sizes with automatic
  vectorization when possible
* unit tests: add "geo_" prefix to all unit tests related to the
  geometry module and start splitting the big "geometry.cpp" tests to
  multiple smaller ones (also include new tests)
2009-02-17 09:53:05 +00:00
Gael Guennebaud
67b4fab4e3 fix assertion issue in slice vectorization 2009-02-16 10:17:21 +00:00
Gael Guennebaud
c5245a3388 update vectorization_logic unit test wrt previous sum/redux change 2009-02-13 08:45:19 +00:00
Konstantinos A. Margaritis
349557db9a no reason for 3 vec_mins, 2 are enough apparently in ei_predux_min 2009-02-12 22:03:30 +00:00
Konstantinos A. Margaritis
ad2bf14dbb modified ei_predux_min/max to actually use altivec instructions 2009-02-12 21:58:44 +00:00
Gael Guennebaud
3b12f48aa3 compilation fix for SuperLU 3.1 2009-02-12 17:50:51 +00:00
Gael Guennebaud
20a8bb96eb fix m = m*m with m sparse (gug found by Frederik Heinz) 2009-02-12 15:57:13 +00:00
Gael Guennebaud
59a1ed0932 fix bug in MapBase found by myguel 2009-02-12 15:29:20 +00:00
Gael Guennebaud
51c991af45 * exit Sum.h, exit Prod.h, welcome vectorization of redux() !
* add vectorization for minCoeff and maxCoeff
2009-02-12 15:18:59 +00:00
Gael Guennebaud
dc97d483fd update of the Array module doc 2009-02-12 07:55:35 +00:00
Gael Guennebaud
deb6254702 some ICC fixes 2009-02-12 07:55:03 +00:00
Gael Guennebaud
7954f7709a add ei_predux_mul for AltiVec 2009-02-10 18:26:59 +00:00
Gael Guennebaud
cbbc6d940b * add ei_predux_mul internal function
* apply Ricard Marxer's prod() patch with fixes for the vectorized path
2009-02-10 18:06:05 +00:00
Gael Guennebaud
a0cc5fba0a fix ICC internal compilation error 2009-02-10 14:15:32 +00:00
Gael Guennebaud
40ad661183 added an experimental IterativeSolvers module (currently in unsupported)
with a constrained conjugate gradient algorithm adapted from GMM++/ITL.
This algorithm is needed for Step.
2009-02-10 10:02:41 +00:00
Gael Guennebaud
e75bef9523 various minor fixes in Sparse module 2009-02-10 10:00:00 +00:00
Gael Guennebaud
169696a078 fix doxygen \ingroup for the array module 2009-02-09 10:13:06 +00:00
Gael Guennebaud
9c14d20656 add select snippet 2009-02-09 10:01:42 +00:00
Gael Guennebaud
a9688f0b71 - add diagonal * sparse product as an expression
- split sparse_basic unit test
- various fixes in sparse module
2009-02-09 09:59:30 +00:00
Gael Guennebaud
e0be020622 add DiagonalMatrix setZero and resize functions 2009-02-09 09:55:54 +00:00
Gael Guennebaud
666ade0c93 add "remap" snippet using placement new 2009-02-09 09:54:48 +00:00
Thomas Capricelli
5b4c3b21f3 documentation about inheriting from Eigen::Matrix 2009-02-08 22:36:28 +00:00
Konstantinos A. Margaritis
15e40b1099 fixed preserve_mask definition for AltiVec (needed __vector keyword) 2009-02-08 18:43:57 +00:00
Konstantinos A. Margaritis
505bdbb9ef should be __powerpc__ instead of __ppc__ 2009-02-08 18:22:34 +00:00
Gael Guennebaud
d8b7283a98 remove remaining debug stuff in Reverse.h 2009-02-08 12:56:47 +00:00
Vincenzo Di Massa
a385de080d fix build 2009-02-07 16:14:46 +00:00
Gael Guennebaud
6c4b6c2da8 forgot to commit the deletion of StdVector directory 2009-02-07 15:47:13 +00:00
Gael Guennebaud
17fd619430 more fixes in StdVector, sorry for the noise 2009-02-07 12:51:58 +00:00
Gael Guennebaud
365ec0744c disable vector::resize() workaround for gcc < 4.1 (they already use a const
reference)
2009-02-07 12:46:04 +00:00
Gael Guennebaud
671398372b arf... s/_MSVC_VER/_MSC_VER 2009-02-07 11:49:33 +00:00
Gael Guennebaud
4dd2efa113 little fix in new StdVector 2009-02-07 11:30:19 +00:00
Gael Guennebaud
3009d79a1f * allow Matrix to be resized to 0 (solve a lot of troubles with
some containers)
* new workaround for std::vector which is supposed to work for any
  classes having EIGEN_MAKE_ALIGNED_OPERATOR_NEW as discussed on ML
2009-02-07 11:16:15 +00:00
Gael Guennebaud
24808dc090 force the use of debug version of QtCore unless it is not available 2009-02-06 21:50:18 +00:00
Gael Guennebaud
1c24f5bbc5 eventually MSVC does not like my /O2 flags (incompatibility with other option set by default) 2009-02-06 15:29:40 +00:00
Gael Guennebaud
19b035ee11 s/cholesky/llt in precompiled lib and BTL 2009-02-06 14:01:01 +00:00
Gael Guennebaud
cc90495e30 add bench_reverse, draft of a reverse vectorization for AltiVec, make
global Scaling function static
2009-02-06 13:28:55 +00:00
Gael Guennebaud
f5d96df800 Add vectorization of Reverse (was more tricky than I thought) and
simplify the index based functions
2009-02-06 12:40:38 +00:00
Gael Guennebaud
4dc4ab3abb Reverse::coeff*(int) functions are for vector only 2009-02-06 09:13:04 +00:00
Gael Guennebaud
6fbca94803 apply Ricard patch for Reverse with minor modifications 2009-02-06 09:01:50 +00:00
Gael Guennebaud
dc97079905 add optimization flag for MSVC and heavy tests
remove unsupported namespace
2009-02-05 22:13:50 +00:00
Gael Guennebaud
a4487ef198 add snippet for sub/super diagonal
fix a few doc issues
2009-02-05 21:19:40 +00:00
Gael Guennebaud
e80f0b6c4e update doc of DiagonalCoeffs 2009-02-05 18:39:23 +00:00
Gael Guennebaud
910b387438 Add sub/super-diagonal expression (read/write) as a trivial extension of
DiagonalCoeffs. The current API is simply:
  m.diagonal<1>() => 1st super diagonal
  m.diagonal<-2>() => the 2nd sub diagonal
I'll add a code snippet once we agree on this API.
2009-02-05 18:37:21 +00:00
Gael Guennebaud
9637af5ecf undo an unecessary change in cache-friendly product made for MSVC 2009-02-05 11:37:57 +00:00
Gael Guennebaud
7c374394ee remove explicit fortran dependency in FindCholmod 2009-02-05 10:34:41 +00:00
Gael Guennebaud
8a6f38c638 forgot to commit changes in main CMakeLists.txt
(add unsupported folder)
2009-02-05 09:37:53 +00:00
Gael Guennebaud
da45184635 add custom FindBLAS FindLAPACK working for c++ compiler
fix issues in Cholmod/Taucs supports
2009-02-05 09:36:52 +00:00
Gael Guennebaud
1119f846cf fix various Taucs and Cholmod issues (they have not been tested for a while) 2009-02-04 22:14:09 +00:00
Gael Guennebaud
41f80b26cb bugfix in LDLt for size==1 2009-02-04 20:20:34 +00:00
Gael Guennebaud
5e6707d7f7 forgot to add EigenTesting.cmake file 2009-02-04 17:11:04 +00:00
Benoit Jacob
5d69c5102b oops, #ifdef instead of #if ---> bug 2009-02-04 16:57:28 +00:00
Benoit Jacob
f81479d392 forgot to update this unit test... 2009-02-04 16:55:38 +00:00
Benoit Jacob
93a089adc8 disable alignment altogether outside of the platforms which potentially have SSE or AltiVec
This should remove most portability issues to other platforms where data alignment issues (including
overloading operator new and new[]) can be tricky, and where data alignment is not needed in the first place.
2009-02-04 16:53:03 +00:00
Gael Guennebaud
c26dc9ab1b nothing interesting to see 2009-02-04 16:22:56 +00:00
Gael Guennebaud
82e0cbbac5 extend unsupported/README.txt file 2009-02-04 15:44:59 +00:00
Gael Guennebaud
95db32fcdc setup the unsupported directory structure.
The unsupported module documentation is automatically generated in:
  build/doc/unsupported/
with bidirectional cross references.
I leave a class Foo in AdolcForward module to illustrate the
cross-reference behavior. I will remove it in the next commit.
2009-02-04 15:37:00 +00:00
Gael Guennebaud
44a527dfa5 * classify and sort the doxygen "related pages"
by tweaking the filename and adding 2 categories:
   Troubleshooting and Advanced
* use the EXCLUDE_SYMBOLS to clean the class list
  (insteaded of a homemade bash script)
* remove the broken "exemple list"
* re-structure the unsupported directory as mentionned in the ML and
  integrate the doc as follow:
  - snippets of the unsupported directory are directly imported from the
    main snippets/CMakefile.txt (no need to duplicate code)
  - add a top level "Unsupported modules" group
  - unsupported modules have to defined their own sub group and nest it
    using \ingroup Unsupported_modules
  - then a pair of //@{ //@} will put everything in the submodule
  - this is just a proposal !
2009-02-04 09:44:44 +00:00
Gael Guennebaud
b0dd22cc72 update cholesky benchmark 2009-02-03 19:05:10 +00:00
Keir Mierle
b9a82be727 Add full pivoting to LDLT decomposition. 2009-02-03 17:50:35 +00:00
Gael Guennebaud
6c5868cc99 add a short unsupported/readme file 2009-02-03 09:02:55 +00:00
Gael Guennebaud
42fa03948c add unsupported/ directory with a first contribution from myself:
a header file providing support for adolc's adouble type in forward
mode. (adolc is an automatic differentiation library)
2009-02-03 08:34:09 +00:00
Keir Mierle
b4777379d4 Add Matrix::resizeLike(other) convenience function and test. 2009-02-03 01:43:59 +00:00
Benoit Jacob
bc9a276b78 call it "Eigen 2.0.50-unstable" to make things clear, and update EIGEN_MINOR_VERSION to 50 2009-02-02 17:15:01 +00:00
Benoit Jacob
61e45ed500 * label Cholesky and solveTriangular.* as experimental
* improve Experimental.dox
* update urls from /api/ to /dox/
2009-02-02 14:21:35 +00:00
Benoit Jacob
f3d5ba0c1f the BSD's don't have aligned malloc after all 2009-02-02 13:22:19 +00:00
Gael Guennebaud
42c4bc0ecf fix tutorial abs/abs2 (thanks to Keir) 2009-02-01 22:38:51 +00:00
Gael Guennebaud
4111316104 forgot to commit QR_solve snippet 2009-02-01 20:47:19 +00:00
Benoit Jacob
37cceeeaca add missing inline keywords 2009-01-30 23:08:47 +00:00
Gael Guennebaud
82e70fbcae fix duplicated geometry module in the doc 2009-01-29 23:10:16 +00:00
Gael Guennebaud
13d0a310fd fix MSVC internal compilation error 2009-01-29 22:49:24 +00:00
Gael Guennebaud
8e0ec3c62b reduce epsilon in QR 2009-01-29 16:11:46 +00:00
Gael Guennebaud
cf0857c44d fix MSVC stupid warnings 2009-01-29 09:45:25 +00:00
Gael Guennebaud
1752a3a677 more MSVC fixes, and more code factorization in Geometry module 2009-01-29 09:36:48 +00:00
Gael Guennebaud
36c8a64923 add MatrixBase::stableNorm() avoiding over/under-flow
using it in QR reduced the error of Keir test from 1e-12 to 1e-24 but
that's much more expensive !
2009-01-28 22:11:56 +00:00
Gael Guennebaud
42b237b83a * make sum and redux honor EvalBeforeNestingBit too
* fix MSVC issues (hopefully)
2009-01-28 21:09:08 +00:00
Gael Guennebaud
cc6159743d make dot() honor EvalBeforeNestingBit 2009-01-28 20:53:27 +00:00
Gael Guennebaud
dde729379a various updates in the (still messy) sparse benchmarks 2009-01-28 20:32:28 +00:00
Gael Guennebaud
9b594ab0fb fix overflow in sparse product 2009-01-28 18:21:38 +00:00
Benoit Jacob
9bc44094c5 add EIGEN_NO_AUTOMATIC_RESIZING
if defined, already initialized matrices won't be automatically resized in assignments
uninitialized matrices may still be initialized
2009-01-28 16:44:03 +00:00
Gael Guennebaud
1b194193ef Big change in DiagonalMatrix and Geometry/Scaling:
* previous DiagonalMatrix expression is now DiagonalMatrixWrapper
* DiagonalMatrix class is now for storage
* add the DiagonalMatrixBase class to factorize code of the
  two previous classes
* remove Scaling class (it is now a global function)
* add UniformScaling helper class
  (don't use it directly, use the Scaling function)
* add the Scaling global function to simplify the creation
  of scaling objects
There is still a lot to do, in particular about DiagonalProduct for which
the goal is to get rid of the "if()" in the coeff() function. At least
it is not worse than before ! Also need to uptade the tutorial and add more doc.
2009-01-28 16:26:06 +00:00
Gael Guennebaud
da555585e2 Patch from Frank fixing stupid MSVC internal crash 2009-01-28 12:52:26 +00:00
Gael Guennebaud
22bfc77124 move EIGEN_DEPRECATED to the begining of the function (pb with MSVC) 2009-01-28 12:48:19 +00:00
Gael Guennebaud
0f15a8d829 QR: add isInjective(), isSurjective(),
mark isFullRank() deprecated,
    add solve() (mix of Keir's patch and LU::solve())
=> there is big problem with complex which are not working
2009-01-28 09:45:53 +00:00
Gael Guennebaud
cf89d9371a LLT: makes the non positive definite test less strict, but we still need
something better.
2009-01-27 23:01:53 +00:00
Gael Guennebaud
8ce4503494 add support for read/write sub sets of inner vectors (sparse module) 2009-01-27 22:48:17 +00:00
Benoit Jacob
d384671793 now these tests succeed with 10,000 repeats 2009-01-27 20:47:12 +00:00
Gael Guennebaud
4ac8cabf8a fix my previous commit with EIGEN_EMPTY macro bug 2009-01-27 19:08:20 +00:00
Benoit Jacob
9b06e072a5 fix type mismatch caught by new static assert 2009-01-27 17:42:04 +00:00
Gael Guennebaud
d700343632 fix "empty macro arguments are undefined in ISO C90 and ISO C++98"
warning found by gcc-svn
2009-01-27 17:40:14 +00:00
261 changed files with 16169 additions and 3080 deletions

View File

@@ -1,5 +1,5 @@
project(Eigen)
set(EIGEN_VERSION_NUMBER "2.0.2")
set(EIGEN_VERSION_NUMBER "2.0.52-unstable")
#if the svnversion program is absent, this will leave the SVN_REVISION string empty,
#but won't stop CMake.
@@ -17,6 +17,8 @@ endif(EIGEN_SVN_REVISION)
cmake_minimum_required(VERSION 2.6.2)
include(CheckCXXCompilerFlag)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
option(EIGEN_BUILD_TESTS "Build tests" OFF)
@@ -34,7 +36,13 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON)
if(CMAKE_COMPILER_IS_GNUCXX)
if(CMAKE_SYSTEM_NAME MATCHES Linux)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -Wextra -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fno-exceptions -fno-check-new -fno-common -fstrict-aliasing")
check_cxx_compiler_flag("-Wextra" COMPILER_SUPPORT_WEXTRA)
if(COMPILER_SUPPORT_WEXTRA)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra")
endif()
if(NOT EIGEN_TEST_LIB)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
endif(NOT EIGEN_TEST_LIB)
@@ -84,14 +92,22 @@ endif(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
set(INCLUDE_INSTALL_DIR
"${CMAKE_INSTALL_PREFIX}/include/eigen2"
CACHE PATH
"The directory where we install the header files"
FORCE)
add_subdirectory(Eigen)
add_subdirectory(doc)
if(EIGEN_BUILD_TESTS)
include(CTest)
add_subdirectory(test)
endif(EIGEN_BUILD_TESTS)
add_subdirectory(doc)
add_subdirectory(unsupported)
if(EIGEN_BUILD_DEMOS)
add_subdirectory(demos)
@@ -100,3 +116,7 @@ endif(EIGEN_BUILD_DEMOS)
if(EIGEN_BUILD_BTL)
add_subdirectory(bench/btl)
endif(EIGEN_BUILD_BTL)
if(EIGEN_BUILD_TESTS)
ei_testing_print_summary()
endif(EIGEN_BUILD_TESTS)

View File

@@ -16,8 +16,12 @@ namespace Eigen {
* - sin, cos, sqrt, pow, exp, log, square, cube, inverse (reciprocal).
*
* This module also provides various MatrixBase methods, including:
* - \ref MatrixBase::all() "all", \ref MatrixBase::any() "any",
* - \ref MatrixBase::Random() "random matrix initialization"
* - boolean reductions: \ref MatrixBase::all() "all", \ref MatrixBase::any() "any", \ref MatrixBase::count() "count",
* - \ref MatrixBase::Random() "random matrix initialization",
* - a \ref MatrixBase::select() "select" function mimicking the trivariate ?: operator,
* - \ref MatrixBase::colwise() "column-wise" and \ref MatrixBase::rowwise() "row-wise" reductions,
* - \ref MatrixBase::reverse() "matrix reverse",
* - \ref MatrixBase::lpNorm() "generic matrix norm".
*
* \code
* #include <Eigen/Array>
@@ -31,6 +35,8 @@ namespace Eigen {
#include "src/Array/PartialRedux.h"
#include "src/Array/Random.h"
#include "src/Array/Norms.h"
#include "src/Array/Replicate.h"
#include "src/Array/Reverse.h"
} // namespace Eigen

View File

@@ -20,15 +20,9 @@ if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g1 -O2")
endif(CMAKE_COMPILER_IS_GNUCXX)
set(INCLUDE_INSTALL_DIR
"${CMAKE_INSTALL_PREFIX}/include/eigen2"
CACHE PATH
"The directory where we install the header files"
FORCE)
install(FILES
${Eigen_HEADERS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel
)
add_subdirectory(src)

View File

@@ -38,8 +38,8 @@ namespace Eigen {
} // namespace Eigen
#define EIGEN_CHOLESKY_MODULE_INSTANTIATE_TYPE(MATRIXTYPE,PREFIX) \
PREFIX template class Cholesky<MATRIXTYPE>; \
PREFIX template class CholeskyWithoutSquareRoot<MATRIXTYPE>
PREFIX template class LLT<MATRIXTYPE>; \
PREFIX template class LDLT<MATRIXTYPE>
#define EIGEN_CHOLESKY_MODULE_INSTANTIATE(PREFIX) \
EIGEN_CHOLESKY_MODULE_INSTANTIATE_TYPE(Matrix2f,PREFIX); \

View File

@@ -1,3 +1,28 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_CORE_H
#define EIGEN_CORE_H
@@ -11,7 +36,7 @@
// a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
#if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64)
#define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
#endif
#endif
#endif
#endif
@@ -26,6 +51,10 @@
#define EIGEN_SSE2_BUT_NOT_OLD_GCC
#endif
#ifdef EIGEN_DONT_ALIGN
#define EIGEN_DONT_VECTORIZE
#endif
#ifndef EIGEN_DONT_VECTORIZE
#if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
#define EIGEN_VECTORIZE
@@ -101,6 +130,7 @@ namespace Eigen {
#if defined EIGEN_VECTORIZE_SSE
#include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/MathFunctions.h"
#elif defined EIGEN_VECTORIZE_ALTIVEC
#include "src/Core/arch/AltiVec/PacketMath.h"
#endif
@@ -120,6 +150,7 @@ namespace Eigen {
#include "src/Core/MatrixStorage.h"
#include "src/Core/NestByValue.h"
#include "src/Core/ReturnByValue.h"
#include "src/Core/Flagged.h"
#include "src/Core/Matrix.h"
#include "src/Core/Cwise.h"
@@ -127,7 +158,6 @@ namespace Eigen {
#include "src/Core/CwiseUnaryOp.h"
#include "src/Core/CwiseNullaryOp.h"
#include "src/Core/Dot.h"
#include "src/Core/Product.h"
#include "src/Core/DiagonalProduct.h"
#include "src/Core/SolveTriangular.h"
#include "src/Core/MapBase.h"
@@ -136,8 +166,7 @@ namespace Eigen {
#include "src/Core/Minor.h"
#include "src/Core/Transpose.h"
#include "src/Core/DiagonalMatrix.h"
#include "src/Core/DiagonalCoeffs.h"
#include "src/Core/Sum.h"
#include "src/Core/Diagonal.h"
#include "src/Core/Redux.h"
#include "src/Core/Visitor.h"
#include "src/Core/Fuzzy.h"
@@ -145,7 +174,10 @@ namespace Eigen {
#include "src/Core/Swap.h"
#include "src/Core/CommaInitializer.h"
#include "src/Core/Part.h"
#include "src/Core/CacheFriendlyProduct.h"
#include "src/Core/Product.h"
#include "src/Core/products/GeneralMatrixMatrix.h"
#include "src/Core/products/GeneralMatrixVector.h"
#include "src/Core/products/SelfadjointMatrixVector.h"
} // namespace Eigen

View File

@@ -32,6 +32,7 @@ namespace Eigen {
*/
#include "src/Geometry/OrthoMethods.h"
#include "src/Geometry/Homogeneous.h"
#include "src/Geometry/RotationBase.h"
#include "src/Geometry/Rotation2D.h"
#include "src/Geometry/Quaternion.h"
@@ -44,6 +45,10 @@ namespace Eigen {
#include "src/Geometry/ParametrizedLine.h"
#include "src/Geometry/AlignedBox.h"
#if defined EIGEN_VECTORIZE_SSE
#include "src/Geometry/arch/Geometry_SSE.h"
#endif
} // namespace Eigen
#include "src/Core/util/EnableMSVCWarnings.h"

View File

@@ -19,6 +19,7 @@ namespace Eigen {
*/
#include "src/LU/LU.h"
#include "src/LU/PartialLU.h"
#include "src/LU/Determinant.h"
#include "src/LU/Inverse.h"

View File

@@ -22,7 +22,6 @@
#endif
#ifdef EIGEN_TAUCS_SUPPORT
// taucs.h declares a lot of mess
#define isnan
#define finite
@@ -40,26 +39,28 @@
#ifdef max
#undef max
#endif
#ifdef complex
#undef complex
#endif
#endif
#ifdef EIGEN_SUPERLU_SUPPORT
typedef int int_t;
#include "superlu/slu_Cnames.h"
#include "superlu/supermatrix.h"
#include "superlu/slu_util.h"
#include "slu_Cnames.h"
#include "supermatrix.h"
#include "slu_util.h"
namespace SuperLU_S {
#include "superlu/slu_sdefs.h"
#include "slu_sdefs.h"
}
namespace SuperLU_D {
#include "superlu/slu_ddefs.h"
#include "slu_ddefs.h"
}
namespace SuperLU_C {
#include "superlu/slu_cdefs.h"
#include "slu_cdefs.h"
}
namespace SuperLU_Z {
#include "superlu/slu_zdefs.h"
#include "slu_zdefs.h"
}
namespace Eigen { struct SluMatrix; }
#endif
@@ -102,6 +103,8 @@ namespace Eigen {
#include "src/Sparse/SparseFuzzy.h"
#include "src/Sparse/SparseFlagged.h"
#include "src/Sparse/SparseProduct.h"
#include "src/Sparse/SparseDiagonalProduct.h"
#include "src/Sparse/SparseTriangular.h"
#include "src/Sparse/TriangularSolver.h"
#include "src/Sparse/SparseLLT.h"
#include "src/Sparse/SparseLDLT.h"

View File

@@ -1,49 +1,71 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2009 Hauke Heibel <hauke.heibel@googlemail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_STDVECTOR_MODULE_H
#define EIGEN_STDVECTOR_MODULE_H
#if defined(_GLIBCXX_VECTOR) || defined(_VECTOR_)
#error you must include Eigen/StdVector before std::vector
#endif
#ifndef EIGEN_GNUC_AT_LEAST
#ifdef __GNUC__
#define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__>=x && __GNUC_MINOR__>=y) || __GNUC__>x)
#else
#define EIGEN_GNUC_AT_LEAST(x,y) 0
#endif
#endif
#define vector std_vector
#include <vector>
#undef vector
#include "Core"
#include <vector>
namespace Eigen {
template<typename T> class aligned_allocator;
// This one is needed to prevent reimplementing the whole std::vector.
template <class T>
class aligned_allocator_indirection : public aligned_allocator<T>
{
public:
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef T value_type;
// meta programming to determine if a class has a given member
struct ei_does_not_have_aligned_operator_new_marker_sizeof {int a[1];};
struct ei_has_aligned_operator_new_marker_sizeof {int a[2];};
template<class U>
struct rebind
{
typedef aligned_allocator_indirection<U> other;
};
template<typename ClassType>
struct ei_has_aligned_operator_new {
template<typename T>
static ei_has_aligned_operator_new_marker_sizeof
test(T const *, typename T::ei_operator_new_marker_type const * = 0);
static ei_does_not_have_aligned_operator_new_marker_sizeof
test(...);
// note that the following indirection is needed for gcc-3.3
enum {ret = sizeof(test(static_cast<ClassType*>(0)))
== sizeof(ei_has_aligned_operator_new_marker_sizeof) };
aligned_allocator_indirection() throw() {}
aligned_allocator_indirection(const aligned_allocator_indirection& ) throw() : aligned_allocator<T>() {}
aligned_allocator_indirection(const aligned_allocator<T>& ) throw() {}
template<class U>
aligned_allocator_indirection(const aligned_allocator_indirection<U>& ) throw() {}
template<class U>
aligned_allocator_indirection(const aligned_allocator<U>& ) throw() {}
~aligned_allocator_indirection() throw() {}
};
#ifdef _MSC_VER
// sometimes, MSVC detects, at compile time, that the argument x
// in std::vector::resize(size_t s,T x) won't be aligned and generate an error
// even if this function is never called. Whence this little wrapper.
#define _EIGEN_WORKAROUND_MSVC_STD_VECTOR(T) Eigen::ei_workaround_msvc_std_vector<T>
#define EIGEN_WORKAROUND_MSVC_STD_VECTOR(T) Eigen::ei_workaround_msvc_std_vector<T>
template<typename T> struct ei_workaround_msvc_std_vector : public T
{
inline ei_workaround_msvc_std_vector() : T() {}
@@ -59,7 +81,7 @@ struct ei_has_aligned_operator_new {
#else
#define _EIGEN_WORKAROUND_MSVC_STD_VECTOR(T) T
#define EIGEN_WORKAROUND_MSVC_STD_VECTOR(T) T
#endif
@@ -73,59 +95,71 @@ namespace std {
typedef typename vector_base::allocator_type allocator_type; \
typedef typename vector_base::size_type size_type; \
typedef typename vector_base::iterator iterator; \
explicit vector(const allocator_type& __a = allocator_type()) : vector_base(__a) {} \
typedef typename vector_base::const_iterator const_iterator; \
explicit vector(const allocator_type& a = allocator_type()) : vector_base(a) {} \
template<typename InputIterator> \
vector(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) \
: vector_base(first, last, a) {} \
vector(const vector& c) : vector_base(c) {} \
vector(size_type num, const value_type& val = value_type()) : vector_base(num, val) {} \
explicit vector(size_type num, const value_type& val = value_type()) : vector_base(num, val) {} \
vector(iterator start, iterator end) : vector_base(start, end) {} \
vector& operator=(const vector& __x) { \
vector_base::operator=(__x); \
vector& operator=(const vector& x) { \
vector_base::operator=(x); \
return *this; \
}
template<typename T,
typename AllocT = std::allocator<T>,
bool HasAlignedNew = Eigen::ei_has_aligned_operator_new<T>::ret>
class vector : public std::std_vector<T,AllocT>
template<typename T>
class vector<T,Eigen::aligned_allocator<T> >
: public vector<EIGEN_WORKAROUND_MSVC_STD_VECTOR(T),
Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STD_VECTOR(T)> >
{
typedef std_vector<T, AllocT> vector_base;
EIGEN_STD_VECTOR_SPECIALIZATION_BODY
};
template<typename T,typename DummyAlloc>
class vector<T,DummyAlloc,true>
: public std::std_vector<_EIGEN_WORKAROUND_MSVC_STD_VECTOR(T),
Eigen::aligned_allocator<_EIGEN_WORKAROUND_MSVC_STD_VECTOR(T)> >
{
typedef std_vector<_EIGEN_WORKAROUND_MSVC_STD_VECTOR(T),
Eigen::aligned_allocator<_EIGEN_WORKAROUND_MSVC_STD_VECTOR(T)> > vector_base;
typedef vector<EIGEN_WORKAROUND_MSVC_STD_VECTOR(T),
Eigen::aligned_allocator_indirection<EIGEN_WORKAROUND_MSVC_STD_VECTOR(T)> > vector_base;
EIGEN_STD_VECTOR_SPECIALIZATION_BODY
void resize(size_type __new_size)
{ resize(__new_size, T()); }
void resize(size_type new_size)
{ resize(new_size, T()); }
#if defined(_VECTOR_)
#if defined(_VECTOR_)
// workaround MSVC std::vector implementation
void resize(size_type __new_size, const value_type& __x)
{
if (vector_base::size() < __new_size)
vector_base::_Insert_n(vector_base::end(), __new_size - vector_base::size(), __x);
else if (__new_size < vector_base::size())
vector_base::erase(vector_base::begin() + __new_size, vector_base::end());
void resize(size_type new_size, const value_type& x)
{
if (vector_base::size() < new_size)
vector_base::_Insert_n(vector_base::end(), new_size - vector_base::size(), x);
else if (new_size < vector_base::size())
vector_base::erase(vector_base::begin() + new_size, vector_base::end());
}
#elif defined(_GLIBCXX_VECTOR) && EIGEN_GNUC_AT_LEAST(4,1)
void push_back(const value_type& x)
{ vector_base::push_back(x); }
using vector_base::insert;
iterator insert(const_iterator position, const value_type& x)
{ return vector_base::insert(position,x); }
void insert(const_iterator position, size_type new_size, const value_type& x)
{ vector_base::insert(position, new_size, x); }
#elif defined(_GLIBCXX_VECTOR) && EIGEN_GNUC_AT_LEAST(4,1)
// workaround GCC std::vector implementation
// Note that before gcc-4.1 we already have: std::vector::resize(size_type,const T&),
// no no need to workaround !
void resize(size_type __new_size, const value_type& __x)
{
if (__new_size < vector_base::size())
vector_base::_M_erase_at_end(this->_M_impl._M_start + __new_size);
else
vector_base::insert(vector_base::end(), __new_size - vector_base::size(), __x);
}
#else
void resize(size_type new_size, const value_type& x)
{
if (new_size < vector_base::size())
vector_base::_M_erase_at_end(this->_M_impl._M_start + new_size);
else
vector_base::insert(vector_base::end(), new_size - vector_base::size(), x);
}
#elif defined(_GLIBCXX_VECTOR)
using vector_base::resize;
#endif
#else
// default implementation which should always work.
void resize(size_type new_size, const value_type& x)
{
if (new_size < vector_base::size())
vector_base::erase(vector_base::begin() + new_size, vector_base::end());
else if (new_size > vector_base::size())
vector_base::insert(vector_base::end(), new_size - vector_base::size(), x);
}
#endif
};
}

View File

@@ -2,5 +2,5 @@ FILE(GLOB Eigen_Array_SRCS "*.h")
INSTALL(FILES
${Eigen_Array_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Array
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Array COMPONENT Devel
)

View File

@@ -82,7 +82,7 @@ Cwise<ExpressionType>::log() const
* Example: \include Cwise_cos.cpp
* Output: \verbinclude Cwise_cos.out
*
* \sa sin(), exp()
* \sa sin(), exp(), EIGEN_FAST_MATH
*/
template<typename ExpressionType>
inline const EIGEN_CWISE_UNOP_RETURN_TYPE(ei_scalar_cos_op)
@@ -99,7 +99,7 @@ Cwise<ExpressionType>::cos() const
* Example: \include Cwise_sin.cpp
* Output: \verbinclude Cwise_sin.out
*
* \sa cos(), exp()
* \sa cos(), exp(), EIGEN_FAST_MATH
*/
template<typename ExpressionType>
inline const EIGEN_CWISE_UNOP_RETURN_TYPE(ei_scalar_sin_op)

View File

@@ -58,10 +58,16 @@ struct ei_functor_traits<ei_scalar_add_op<Scalar> >
*/
template<typename Scalar> struct ei_scalar_sqrt_op EIGEN_EMPTY_STRUCT {
inline const Scalar operator() (const Scalar& a) const { return ei_sqrt(a); }
typedef typename ei_packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return ei_psqrt(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_sqrt_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
{ enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = ei_packet_traits<Scalar>::HasSqrt
};
};
/** \internal
*
@@ -73,10 +79,12 @@ struct ei_functor_traits<ei_scalar_sqrt_op<Scalar> >
*/
template<typename Scalar> struct ei_scalar_exp_op EIGEN_EMPTY_STRUCT {
inline const Scalar operator() (const Scalar& a) const { return ei_exp(a); }
typedef typename ei_packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return ei_pexp(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_exp_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasExp }; };
/** \internal
*
@@ -88,10 +96,12 @@ struct ei_functor_traits<ei_scalar_exp_op<Scalar> >
*/
template<typename Scalar> struct ei_scalar_log_op EIGEN_EMPTY_STRUCT {
inline const Scalar operator() (const Scalar& a) const { return ei_log(a); }
typedef typename ei_packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return ei_plog(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_log_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::HasLog }; };
/** \internal
*
@@ -102,11 +112,18 @@ struct ei_functor_traits<ei_scalar_log_op<Scalar> >
* \sa class CwiseUnaryOp, Cwise::cos()
*/
template<typename Scalar> struct ei_scalar_cos_op EIGEN_EMPTY_STRUCT {
inline const Scalar operator() (const Scalar& a) const { return ei_cos(a); }
inline Scalar operator() (const Scalar& a) const { return ei_cos(a); }
typedef typename ei_packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return ei_pcos(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_cos_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
{
enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = ei_packet_traits<Scalar>::HasCos && EIGEN_FAST_MATH
};
};
/** \internal
*
@@ -118,10 +135,17 @@ struct ei_functor_traits<ei_scalar_cos_op<Scalar> >
*/
template<typename Scalar> struct ei_scalar_sin_op EIGEN_EMPTY_STRUCT {
inline const Scalar operator() (const Scalar& a) const { return ei_sin(a); }
typedef typename ei_packet_traits<Scalar>::type Packet;
inline Packet packetOp(const Packet& a) const { return ei_psin(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_sin_op<Scalar> >
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
{
enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = ei_packet_traits<Scalar>::HasSin && EIGEN_FAST_MATH
};
};
/** \internal
*

View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
@@ -26,7 +26,7 @@
#ifndef EIGEN_PARTIAL_REDUX_H
#define EIGEN_PARTIAL_REDUX_H
/** \array_module \ingroup Array
/** \array_module \ingroup Array_Module
*
* \class PartialReduxExpr
*
@@ -119,6 +119,8 @@ EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
/** \internal */
template <typename BinaryOp, typename Scalar>
@@ -135,7 +137,7 @@ struct ei_member_redux {
const BinaryOp m_functor;
};
/** \array_module \ingroup Array
/** \array_module \ingroup Array_Module
*
* \class PartialRedux
*
@@ -177,8 +179,6 @@ template<typename ExpressionType, int Direction> class PartialRedux
> Type;
};
typedef typename ExpressionType::PlainMatrixType CrossReturnType;
inline PartialRedux(const ExpressionType& matrix) : m_matrix(matrix) {}
/** \internal */
@@ -251,7 +251,7 @@ template<typename ExpressionType, int Direction> class PartialRedux
* \sa MatrixBase::any() */
const typename ReturnType<ei_member_any>::Type any() const
{ return _expression(); }
/** \returns a row (or column) vector expression representing
* the number of \c true coefficients of each respective column (or row).
*
@@ -262,32 +262,68 @@ template<typename ExpressionType, int Direction> class PartialRedux
const PartialReduxExpr<ExpressionType, ei_member_count<int>, Direction> count() const
{ return _expression(); }
/** \returns a 3x3 matrix expression of the cross product
* of each column or row of the referenced expression with the \a other vector.
/** \returns a row (or column) vector expression of the product
* of each column (or row) of the referenced expression.
*
* \geometry_module
* Example: \include PartialRedux_prod.cpp
* Output: \verbinclude PartialRedux_prod.out
*
* \sa MatrixBase::cross() */
template<typename OtherDerived>
const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(CrossReturnType,3,3)
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)
EIGEN_STATIC_ASSERT((ei_is_same_type<Scalar, typename OtherDerived::Scalar>::ret),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
* \sa MatrixBase::prod() */
const typename ReturnType<ei_member_prod>::Type prod() const
{ return _expression(); }
if(Direction==Vertical)
return (CrossReturnType()
<< _expression().col(0).cross(other),
_expression().col(1).cross(other),
_expression().col(2).cross(other)).finished();
else
return (CrossReturnType()
<< _expression().row(0).cross(other),
_expression().row(1).cross(other),
_expression().row(2).cross(other)).finished();
/** \returns a matrix expression
* where each column (or row) are reversed.
*
* Example: \include PartialRedux_reverse.cpp
* Output: \verbinclude PartialRedux_reverse.out
*
* \sa MatrixBase::reverse() */
const Reverse<ExpressionType, Direction> reverse() const
{
return Reverse<ExpressionType, Direction>( _expression() );
}
const Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1>
replicate(int factor) const;
template<int Factor>
const Replicate<ExpressionType,(Direction==Vertical?Factor:1),(Direction==Horizontal?Factor:1)>
replicate(int factor = Factor) const;
/////////// Geometry module ///////////
const Homogeneous<ExpressionType,Direction> homogeneous() const;
typedef typename ExpressionType::PlainMatrixType CrossReturnType;
template<typename OtherDerived>
const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;
enum {
HNormalized_Size = Direction==Vertical ? ei_traits<ExpressionType>::RowsAtCompileTime
: ei_traits<ExpressionType>::ColsAtCompileTime,
HNormalized_SizeMinusOne = HNormalized_Size==Dynamic ? Dynamic : HNormalized_Size-1
};
typedef Block<ExpressionType,
Direction==Vertical ? int(HNormalized_SizeMinusOne)
: int(ei_traits<ExpressionType>::RowsAtCompileTime),
Direction==Horizontal ? int(HNormalized_SizeMinusOne)
: int(ei_traits<ExpressionType>::ColsAtCompileTime)>
HNormalized_Block;
typedef Block<ExpressionType,
Direction==Vertical ? 1 : int(ei_traits<ExpressionType>::RowsAtCompileTime),
Direction==Horizontal ? 1 : int(ei_traits<ExpressionType>::ColsAtCompileTime)>
HNormalized_Factors;
typedef CwiseBinaryOp<ei_scalar_quotient_op<typename ei_traits<ExpressionType>::Scalar>,
NestByValue<HNormalized_Block>,
NestByValue<Replicate<NestByValue<HNormalized_Factors>,
Direction==Vertical ? HNormalized_SizeMinusOne : 1,
Direction==Horizontal ? HNormalized_SizeMinusOne : 1> > >
HNormalizedReturnType;
const HNormalizedReturnType hnormalized() const;
protected:
ExpressionTypeNested m_matrix;
};

160
Eigen/src/Array/Replicate.h Normal file
View File

@@ -0,0 +1,160 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_REPLICATE_H
#define EIGEN_REPLICATE_H
/** \nonstableyet
* \class Replicate
*
* \brief Expression of the multiple replication of a matrix or vector
*
* \param MatrixType the type of the object we are replicating
*
* This class represents an expression of the multiple replication of a matrix or vector.
* It is the return type of MatrixBase::replicate() and most of the time
* this is the only way it is used.
*
* \sa MatrixBase::replicate()
*/
template<typename MatrixType,int RowFactor,int ColFactor>
struct ei_traits<Replicate<MatrixType,RowFactor,ColFactor> >
{
typedef typename MatrixType::Scalar Scalar;
typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
enum {
RowsPlusOne = (MatrixType::RowsAtCompileTime != Dynamic) ?
int(MatrixType::RowsAtCompileTime) + 1 : Dynamic,
ColsPlusOne = (MatrixType::ColsAtCompileTime != Dynamic) ?
int(MatrixType::ColsAtCompileTime) + 1 : Dynamic,
RowsAtCompileTime = RowFactor==Dynamic || MatrixType::RowsAtCompileTime==Dynamic
? Dynamic
: RowFactor * MatrixType::RowsAtCompileTime,
ColsAtCompileTime = ColFactor==Dynamic || MatrixType::ColsAtCompileTime==Dynamic
? Dynamic
: ColFactor * MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = RowsAtCompileTime,
MaxColsAtCompileTime = ColsAtCompileTime,
Flags = _MatrixTypeNested::Flags & HereditaryBits,
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
};
};
template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
: public MatrixBase<Replicate<MatrixType,RowFactor,ColFactor> >
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Replicate)
inline Replicate(const MatrixType& matrix)
: m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
{
ei_assert(RowFactor!=Dynamic && ColFactor!=Dynamic);
}
inline Replicate(const MatrixType& matrix, int rowFactor, int colFactor)
: m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
{}
inline int rows() const { return m_matrix.rows() * m_rowFactor.value(); }
inline int cols() const { return m_matrix.cols() * m_colFactor.value(); }
inline Scalar coeff(int row, int col) const
{
return m_matrix.coeff(row%m_matrix.rows(), col%m_matrix.cols());
}
protected:
const typename MatrixType::Nested m_matrix;
const ei_int_if_dynamic<RowFactor> m_rowFactor;
const ei_int_if_dynamic<ColFactor> m_colFactor;
};
/** \nonstableyet
* \return an expression of the replication of \c *this
*
* Example: \include MatrixBase_replicate.cpp
* Output: \verbinclude MatrixBase_replicate.out
*
* \sa PartialRedux::replicate(), MatrixBase::replicate(int,int), class Replicate
*/
template<typename Derived>
template<int RowFactor, int ColFactor>
inline const Replicate<Derived,RowFactor,ColFactor>
MatrixBase<Derived>::replicate() const
{
return derived();
}
/** \nonstableyet
* \return an expression of the replication of \c *this
*
* Example: \include MatrixBase_replicate_int_int.cpp
* Output: \verbinclude MatrixBase_replicate_int_int.out
*
* \sa PartialRedux::replicate(), MatrixBase::replicate<int,int>(), class Replicate
*/
template<typename Derived>
inline const Replicate<Derived,Dynamic,Dynamic>
MatrixBase<Derived>::replicate(int rowFactor,int colFactor) const
{
return Replicate<Derived,Dynamic,Dynamic>(derived(),rowFactor,colFactor);
}
/** \nonstableyet
* \return an expression of the replication of each column (or row) of \c *this
*
* Example: \include DirectionWise_replicate_int.cpp
* Output: \verbinclude DirectionWise_replicate_int.out
*
* \sa PartialRedux::replicate(), MatrixBase::replicate(), class Replicate
*/
template<typename ExpressionType, int Direction>
const Replicate<ExpressionType,(Direction==Vertical?Dynamic:1),(Direction==Horizontal?Dynamic:1)>
PartialRedux<ExpressionType,Direction>::replicate(int factor) const
{
return Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1>
(_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
}
/** \nonstableyet
* \return an expression of the replication of each column (or row) of \c *this
*
* Example: \include DirectionWise_replicate.cpp
* Output: \verbinclude DirectionWise_replicate.out
*
* \sa PartialRedux::replicate(int), MatrixBase::replicate(), class Replicate
*/
template<typename ExpressionType, int Direction>
template<int Factor>
const Replicate<ExpressionType,(Direction==Vertical?Factor:1),(Direction==Horizontal?Factor:1)>
PartialRedux<ExpressionType,Direction>::replicate(int factor) const
{
return Replicate<ExpressionType,Direction==Vertical?Factor:1,Direction==Horizontal?Factor:1>
(_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
}
#endif // EIGEN_REPLICATE_H

202
Eigen/src/Array/Reverse.h Normal file
View File

@@ -0,0 +1,202 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2009 Ricard Marxer <email@ricardmarxer.com>
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_REVERSE_H
#define EIGEN_REVERSE_H
/** \array_module \ingroup Array_Module
*
* \class Reverse
*
* \brief Expression of the reverse of a vector or matrix
*
* \param MatrixType the type of the object of which we are taking the reverse
*
* This class represents an expression of the reverse of a vector.
* It is the return type of MatrixBase::reverse() and PartialRedux::reverse()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::reverse(), PartialRedux::reverse()
*/
template<typename MatrixType, int Direction>
struct ei_traits<Reverse<MatrixType, Direction> >
{
typedef typename MatrixType::Scalar Scalar;
typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
// let's enable LinearAccess only with vectorization because of the product overhead
LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) )
? LinearAccessBit : 0,
Flags = (int(_MatrixTypeNested::Flags) & (HereditaryBits | PacketAccessBit | LinearAccess))
| (int(_MatrixTypeNested::Flags)&UpperTriangularBit ? LowerTriangularBit : 0)
| (int(_MatrixTypeNested::Flags)&LowerTriangularBit ? UpperTriangularBit : 0),
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
};
};
template<typename PacketScalar, bool ReversePacket> struct ei_reverse_packet_cond
{
static inline PacketScalar run(const PacketScalar& x) { return ei_preverse(x); }
};
template<typename PacketScalar> struct ei_reverse_packet_cond<PacketScalar,false>
{
static inline PacketScalar run(const PacketScalar& x) { return x; }
};
template<typename MatrixType, int Direction> class Reverse
: public MatrixBase<Reverse<MatrixType, Direction> >
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Reverse)
protected:
enum {
PacketSize = ei_packet_traits<Scalar>::size,
IsRowMajor = Flags & RowMajorBit,
IsColMajor = !IsRowMajor,
ReverseRow = (Direction == Vertical) || (Direction == BothDirections),
ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1,
ReversePacket = (Direction == BothDirections)
|| ((Direction == Vertical) && IsColMajor)
|| ((Direction == Horizontal) && IsRowMajor)
};
typedef ei_reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
public:
inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse)
inline int rows() const { return m_matrix.rows(); }
inline int cols() const { return m_matrix.cols(); }
inline Scalar& coeffRef(int row, int col)
{
return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row,
ReverseCol ? m_matrix.cols() - col - 1 : col);
}
inline const Scalar coeff(int row, int col) const
{
return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row,
ReverseCol ? m_matrix.cols() - col - 1 : col);
}
inline const Scalar coeff(int index) const
{
return m_matrix.coeff(m_matrix.size() - index - 1);
}
inline Scalar& coeffRef(int index)
{
return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1);
}
template<int LoadMode>
inline const PacketScalar packet(int row, int col) const
{
return reverse_packet::run(m_matrix.template packet<LoadMode>(
ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
ReverseCol ? m_matrix.cols() - col - OffsetCol : col));
}
template<int LoadMode>
inline void writePacket(int row, int col, const PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacket<LoadMode>(
ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
ReverseCol ? m_matrix.cols() - col - OffsetCol : col,
reverse_packet::run(x));
}
template<int LoadMode>
inline const PacketScalar packet(int index) const
{
return ei_preverse(m_matrix.template packet<LoadMode>( m_matrix.size() - index - PacketSize ));
}
template<int LoadMode>
inline void writePacket(int index, const PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, ei_preverse(x));
}
protected:
const typename MatrixType::Nested m_matrix;
};
/** \returns an expression of the reverse of *this.
*
* Example: \include MatrixBase_reverse.cpp
* Output: \verbinclude MatrixBase_reverse.out
*
*/
template<typename Derived>
inline Reverse<Derived, BothDirections>
MatrixBase<Derived>::reverse()
{
return derived();
}
/** This is the const version of reverse(). */
template<typename Derived>
inline const Reverse<Derived, BothDirections>
MatrixBase<Derived>::reverse() const
{
return derived();
}
/** This is the "in place" version of reverse: it reverses \c *this.
*
* In most cases it is probably better to simply use the reversed expression
* of a matrix. However, when reversing the matrix data itself is really needed,
* then this "in-place" version is probably the right choice because it provides
* the following additional features:
* - less error prone: doing the same operation with .reverse() requires special care:
* \code m = m.reverse().eval(); \endcode
* - no temporary object is created (currently there is one created but could be avoided using swap)
* - it allows future optimizations (cache friendliness, etc.)
*
* \sa reverse() */
template<typename Derived>
inline void MatrixBase<Derived>::reverseInPlace()
{
derived() = derived().reverse().eval();
}
#endif // EIGEN_REVERSE_H

View File

@@ -25,7 +25,7 @@
#ifndef EIGEN_SELECT_H
#define EIGEN_SELECT_H
/** \array_module \ingroup Array
/** \array_module \ingroup Array_Module
*
* \class Select
*

View File

@@ -2,5 +2,5 @@ FILE(GLOB Eigen_Cholesky_SRCS "*.h")
INSTALL(FILES
${Eigen_Cholesky_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Cholesky
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Cholesky COMPONENT Devel
)

View File

@@ -2,6 +2,8 @@
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2009 Keir Mierle <mierle@gmail.com>
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -29,22 +31,27 @@
*
* \class LDLT
*
* \brief Robust Cholesky decomposition of a matrix and associated features
* \brief Robust Cholesky decomposition of a matrix
*
* \param MatrixType the type of the matrix of which we are computing the LDL^T Cholesky decomposition
* \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
*
* This class performs a Cholesky decomposition without square root of a symmetric, positive definite
* matrix A such that A = L D L^* = U^* D U, where L is lower triangular with a unit diagonal
* and D is a diagonal matrix.
* Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
* matrix \f$ A \f$ such that \f$ A = P^TLDL^*P \f$, where P is a permutation matrix, L
* is lower triangular with a unit diagonal and D is a diagonal matrix.
*
* Compared to a standard Cholesky decomposition, avoiding the square roots allows for faster and more
* stable computation.
* The decomposition uses pivoting to ensure stability, so that L will have
* zeros in the bottom right rank(A) - n submatrix. Avoiding the square root
* on D also stabilizes the computation.
*
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
* the strict lower part does not have to store correct values.
* Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky decomposition to determine
* whether a system of equations has a solution.
*
* \sa MatrixBase::ldlt(), class LLT
*/
/* THIS PART OF THE DOX IS CURRENTLY DISABLED BECAUSE INACCURATE BECAUSE OF BUG IN THE DECOMPOSITION CODE
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
* the strict lower part does not have to store correct values.
*/
template<typename MatrixType> class LDLT
{
public:
@@ -52,9 +59,13 @@ template<typename MatrixType> class LDLT
typedef typename MatrixType::Scalar Scalar;
typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
typedef Matrix<int, MatrixType::RowsAtCompileTime, 1> IntColVectorType;
typedef Matrix<int, 1, MatrixType::RowsAtCompileTime> IntRowVectorType;
LDLT(const MatrixType& matrix)
: m_matrix(matrix.rows(), matrix.cols())
: m_matrix(matrix.rows(), matrix.cols()),
m_p(matrix.rows()),
m_transpositions(matrix.rows())
{
compute(matrix);
}
@@ -62,11 +73,23 @@ template<typename MatrixType> class LDLT
/** \returns the lower triangular matrix L */
inline Part<MatrixType, UnitLowerTriangular> matrixL(void) const { return m_matrix; }
/** \returns the coefficients of the diagonal matrix D */
inline DiagonalCoeffs<MatrixType> vectorD(void) const { return m_matrix.diagonal(); }
/** \returns a vector of integers, whose size is the number of rows of the matrix being decomposed,
* representing the P permutation i.e. the permutation of the rows. For its precise meaning,
* see the examples given in the documentation of class LU.
*/
inline const IntColVectorType& permutationP() const
{
return m_p;
}
/** \returns true if the matrix is positive definite */
inline bool isPositiveDefinite(void) const { return m_isPositiveDefinite; }
/** \returns the coefficients of the diagonal matrix D */
inline Diagonal<MatrixType,0> vectorD(void) const { return m_matrix.diagonal(); }
/** \returns true if the matrix is positive (semidefinite) */
inline bool isPositive(void) const { return m_sign == 1; }
/** \returns true if the matrix is negative (semidefinite) */
inline bool isNegative(void) const { return m_sign == -1; }
template<typename RhsDerived, typename ResDerived>
bool solve(const MatrixBase<RhsDerived> &b, MatrixBase<ResDerived> *result) const;
@@ -78,76 +101,119 @@ template<typename MatrixType> class LDLT
protected:
/** \internal
* Used to compute and store the cholesky decomposition A = L D L^* = U^* D U.
* Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
* The strict upper part is used during the decomposition, the strict lower
* part correspond to the coefficients of L (its diagonal is equal to 1 and
* is not stored), and the diagonal entries correspond to D.
*/
MatrixType m_matrix;
bool m_isPositiveDefinite;
IntColVectorType m_p;
IntColVectorType m_transpositions;
int m_sign;
};
/** Compute / recompute the LLT decomposition A = L D L^* = U^* D U of \a matrix
/** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix
*/
template<typename MatrixType>
void LDLT<MatrixType>::compute(const MatrixType& a)
{
assert(a.rows()==a.cols());
ei_assert(a.rows()==a.cols());
const int size = a.rows();
m_matrix.resize(size, size);
m_isPositiveDefinite = true;
const RealScalar eps = ei_sqrt(precision<Scalar>());
if (size<=1)
{
m_matrix = a;
m_matrix = a;
if (size <= 1) {
m_p.setZero();
m_transpositions.setZero();
m_sign = ei_real(a.coeff(0,0))>0 ? 1:-1;
return;
}
// Let's preallocate a temporay vector to evaluate the matrix-vector product into it.
// Unlike the standard LLT decomposition, here we cannot evaluate it to the destination
// matrix because it a sub-row which is not compatible suitable for efficient packet evaluation.
// (at least if we assume the matrix is col-major)
RealScalar cutoff = 0, biggest_in_corner;
// By using a temorary, packet-aligned products are guarenteed. In the LLT
// case this is unnecessary because the diagonal is included and will always
// have optimal alignment.
Matrix<Scalar,MatrixType::RowsAtCompileTime,1> _temporary(size);
// Note that, in this algorithm the rows of the strict upper part of m_matrix is used to store
// column vector, thus the strange .conjugate() and .transpose()...
m_matrix.row(0) = a.row(0).conjugate();
m_matrix.col(0).end(size-1) = m_matrix.row(0).end(size-1) / m_matrix.coeff(0,0);
for (int j = 1; j < size; ++j)
for (int j = 0; j < size; ++j)
{
RealScalar tmp = ei_real(a.coeff(j,j) - (m_matrix.row(j).start(j) * m_matrix.col(j).start(j).conjugate()).coeff(0,0));
m_matrix.coeffRef(j,j) = tmp;
// Find largest diagonal element
int index_of_biggest_in_corner;
biggest_in_corner = m_matrix.diagonal().end(size-j).cwise().abs()
.maxCoeff(&index_of_biggest_in_corner);
index_of_biggest_in_corner += j;
if (tmp < eps)
if(j == 0)
{
m_isPositiveDefinite = false;
return;
// The biggest overall is the point of reference to which further diagonals
// are compared; if any diagonal is negligible compared
// to the largest overall, the algorithm bails. This cutoff is suggested
// in "Analysis of the Cholesky Decomposition of a Semi-definite Matrix" by
// Nicholas J. Higham. Also see "Accuracy and Stability of Numerical
// Algorithms" page 217, also by Higham.
cutoff = ei_abs(machine_epsilon<Scalar>() * size * biggest_in_corner);
m_sign = ei_real(m_matrix.diagonal().coeff(index_of_biggest_in_corner)) > 0 ? 1 : -1;
}
int endSize = size-j-1;
if (endSize>0)
// Finish early if the matrix is not full rank.
if(biggest_in_corner < cutoff)
{
_temporary.end(endSize) = ( m_matrix.block(j+1,0, endSize, j)
* m_matrix.col(j).start(j).conjugate() ).lazy();
for(int i = j; i < size; i++) m_transpositions.coeffRef(i) = i;
break;
}
m_matrix.row(j).end(endSize) = a.row(j).end(endSize).conjugate()
m_transpositions.coeffRef(j) = index_of_biggest_in_corner;
if(j != index_of_biggest_in_corner)
{
m_matrix.row(j).swap(m_matrix.row(index_of_biggest_in_corner));
m_matrix.col(j).swap(m_matrix.col(index_of_biggest_in_corner));
}
if (j == 0) {
m_matrix.row(0) = m_matrix.row(0).conjugate();
m_matrix.col(0).end(size-1) = m_matrix.row(0).end(size-1) / m_matrix.coeff(0,0);
continue;
}
RealScalar Djj = ei_real(m_matrix.coeff(j,j) - (m_matrix.row(j).start(j)
* m_matrix.col(j).start(j).conjugate()).coeff(0,0));
m_matrix.coeffRef(j,j) = Djj;
// Finish early if the matrix is not full rank.
if(ei_abs(Djj) < cutoff)
{
for(int i = j; i < size; i++) m_transpositions.coeffRef(i) = i;
break;
}
int endSize = size - j - 1;
if (endSize > 0) {
_temporary.end(endSize) = ( m_matrix.block(j+1,0, endSize, j)
* m_matrix.col(j).start(j).conjugate() ).lazy();
m_matrix.row(j).end(endSize) = m_matrix.row(j).end(endSize).conjugate()
- _temporary.end(endSize).transpose();
m_matrix.col(j).end(endSize) = m_matrix.row(j).end(endSize) / tmp;
m_matrix.col(j).end(endSize) = m_matrix.row(j).end(endSize) / Djj;
}
}
// Reverse applied swaps to get P matrix.
for(int k = 0; k < size; ++k) m_p.coeffRef(k) = k;
for(int k = size-1; k >= 0; --k) {
std::swap(m_p.coeffRef(k), m_p.coeffRef(m_transpositions.coeff(k)));
}
}
/** Computes the solution x of \f$ A x = b \f$ using the current decomposition of A.
* The result is stored in \a result
*
* \returns true in case of success, false otherwise.
* \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
*
* In other words, it computes \f$ b = A^{-1} b \f$ with
* \f$ {L^{*}}^{-1} D^{-1} L^{-1} b \f$ from right to left.
* \f$ P^T{L^{*}}^{-1} D^{-1} L^{-1} P b \f$ from right to left.
*
* \sa LDLT::solveInPlace(), MatrixBase::ldlt()
*/
@@ -157,7 +223,7 @@ bool LDLT<MatrixType>
::solve(const MatrixBase<RhsDerived> &b, MatrixBase<ResDerived> *result) const
{
const int size = m_matrix.rows();
ei_assert(size==b.rows() && "LLT::solve(): invalid number of rows of the right hand side matrix b");
ei_assert(size==b.rows() && "LDLT::solve(): invalid number of rows of the right hand side matrix b");
*result = b;
return solveInPlace(*result);
}
@@ -166,6 +232,8 @@ bool LDLT<MatrixType>
*
* \param bAndX represents both the right-hand side matrix b and result x.
*
* \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
*
* This version avoids a copy when the right hand side matrix b is not
* needed anymore.
*
@@ -176,17 +244,28 @@ template<typename Derived>
bool LDLT<MatrixType>::solveInPlace(MatrixBase<Derived> &bAndX) const
{
const int size = m_matrix.rows();
ei_assert(size==bAndX.rows());
if (!m_isPositiveDefinite)
return false;
ei_assert(size == bAndX.rows());
// z = P b
for(int i = 0; i < size; ++i) bAndX.row(m_transpositions.coeff(i)).swap(bAndX.row(i));
// y = L^-1 z
matrixL().solveTriangularInPlace(bAndX);
bAndX = (m_matrix.cwise().inverse().template part<Diagonal>() * bAndX).lazy();
// w = D^-1 y
bAndX = (m_matrix.diagonal().cwise().inverse().asDiagonal() * bAndX).lazy();
// u = L^-T w
m_matrix.adjoint().template part<UnitUpperTriangular>().solveTriangularInPlace(bAndX);
// x = P^T u
for (int i = size-1; i >= 0; --i) bAndX.row(m_transpositions.coeff(i)).swap(bAndX.row(i));
return true;
}
/** \cholesky_module
* \returns the Cholesky decomposition without square root of \c *this
* \returns the Cholesky decomposition with full pivoting without square root of \c *this
*/
template<typename Derived>
inline const LDLT<typename MatrixBase<Derived>::PlainMatrixType>

View File

@@ -41,11 +41,16 @@
* and even faster. Nevertheless, this standard Cholesky decomposition remains useful in many other
* situations like generalised eigen problems with hermitian matrices.
*
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
* the strict lower part does not have to store correct values.
* Remember that Cholesky decompositions are not rank-revealing. This LLT decomposition is only stable on positive definite matrices,
* use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine whether a system of equations
* has a solution.
*
* \sa MatrixBase::llt(), class LDLT
*/
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
* the strict lower part does not have to store correct values.
*/
template<typename MatrixType> class LLT
{
private:
@@ -69,9 +74,6 @@ template<typename MatrixType> class LLT
/** \returns the lower triangular matrix L */
inline Part<MatrixType, LowerTriangular> matrixL(void) const { return m_matrix; }
/** \returns true if the matrix is positive definite */
inline bool isPositiveDefinite(void) const { return m_isPositiveDefinite; }
template<typename RhsDerived, typename ResDerived>
bool solve(const MatrixBase<RhsDerived> &b, MatrixBase<ResDerived> *result) const;
@@ -86,7 +88,6 @@ template<typename MatrixType> class LLT
* The strict upper part is not used and even not initialized.
*/
MatrixType m_matrix;
bool m_isPositiveDefinite;
};
/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix
@@ -97,22 +98,24 @@ void LLT<MatrixType>::compute(const MatrixType& a)
assert(a.rows()==a.cols());
const int size = a.rows();
m_matrix.resize(size, size);
const RealScalar eps = ei_sqrt(precision<Scalar>());
// The biggest overall is the point of reference to which further diagonals
// are compared; if any diagonal is negligible compared
// to the largest overall, the algorithm bails. This cutoff is suggested
// in "Analysis of the Cholesky Decomposition of a Semi-definite Matrix" by
// Nicholas J. Higham. Also see "Accuracy and Stability of Numerical
// Algorithms" page 217, also by Higham.
const RealScalar cutoff = machine_epsilon<Scalar>() * size * a.diagonal().cwise().abs().maxCoeff();
RealScalar x;
x = ei_real(a.coeff(0,0));
m_isPositiveDefinite = x > eps && ei_isMuchSmallerThan(ei_imag(a.coeff(0,0)), RealScalar(1));
m_matrix.coeffRef(0,0) = ei_sqrt(x);
if(size==1)
return;
m_matrix.col(0).end(size-1) = a.row(0).end(size-1).adjoint() / ei_real(m_matrix.coeff(0,0));
for (int j = 1; j < size; ++j)
{
Scalar tmp = ei_real(a.coeff(j,j)) - m_matrix.row(j).start(j).squaredNorm();
x = ei_real(tmp);
if (x < eps || (!ei_isMuchSmallerThan(ei_imag(tmp), RealScalar(1))))
{
m_isPositiveDefinite = false;
return;
}
x = ei_real(a.coeff(j,j)) - m_matrix.row(j).start(j).squaredNorm();
if (ei_abs(x) < cutoff) continue;
m_matrix.coeffRef(j,j) = x = ei_sqrt(x);
int endSize = size-j-1;
@@ -132,7 +135,7 @@ void LLT<MatrixType>::compute(const MatrixType& a)
/** Computes the solution x of \f$ A x = b \f$ using the current decomposition of A.
* The result is stored in \a result
*
* \returns true in case of success, false otherwise.
* \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
*
* In other words, it computes \f$ b = A^{-1} b \f$ with
* \f$ {L^{*}}^{-1} L^{-1} b \f$ from right to left.
@@ -155,6 +158,8 @@ bool LLT<MatrixType>::solve(const MatrixBase<RhsDerived> &b, MatrixBase<ResDeriv
*
* \param bAndX represents both the right-hand side matrix b and result x.
*
* \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
*
* This version avoids a copy when the right hand side matrix b is not
* needed anymore.
*
@@ -166,8 +171,6 @@ bool LLT<MatrixType>::solveInPlace(MatrixBase<Derived> &bAndX) const
{
const int size = m_matrix.rows();
ei_assert(size==bAndX.rows());
if (!m_isPositiveDefinite)
return false;
matrixL().solveTriangularInPlace(bAndX);
m_matrix.adjoint().template part<UpperTriangular>().solveTriangularInPlace(bAndX);
return true;

View File

@@ -61,7 +61,6 @@
*
* \sa MatrixBase::block(int,int,int,int), MatrixBase::block(int,int), class VectorBlock
*/
template<typename MatrixType, int BlockRows, int BlockCols, int _PacketAccess, int _DirectAccessStatus>
struct ei_traits<Block<MatrixType, BlockRows, BlockCols, _PacketAccess, _DirectAccessStatus> >
{
@@ -69,8 +68,8 @@ struct ei_traits<Block<MatrixType, BlockRows, BlockCols, _PacketAccess, _DirectA
typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
enum{
RowsAtCompileTime = ei_traits<MatrixType>::RowsAtCompileTime == 1 ? 1 : BlockRows,
ColsAtCompileTime = ei_traits<MatrixType>::ColsAtCompileTime == 1 ? 1 : BlockCols,
RowsAtCompileTime = BlockRows,
ColsAtCompileTime = BlockCols,
MaxRowsAtCompileTime = RowsAtCompileTime == 1 ? 1
: (BlockRows==Dynamic ? int(ei_traits<MatrixType>::MaxRowsAtCompileTime) : BlockRows),
MaxColsAtCompileTime = ColsAtCompileTime == 1 ? 1
@@ -153,7 +152,7 @@ template<typename MatrixType, int BlockRows, int BlockCols, int PacketAccess, in
.coeffRef(row + m_startRow.value(), col + m_startCol.value());
}
inline const Scalar coeff(int row, int col) const
inline const CoeffReturnType coeff(int row, int col) const
{
return m_matrix.coeff(row + m_startRow.value(), col + m_startCol.value());
}
@@ -165,7 +164,7 @@ template<typename MatrixType, int BlockRows, int BlockCols, int PacketAccess, in
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
}
inline const Scalar coeff(int index) const
inline const CoeffReturnType coeff(int index) const
{
return m_matrix
.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),

View File

@@ -2,8 +2,16 @@ FILE(GLOB Eigen_Core_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core COMPONENT Devel
)
FILE(GLOB Eigen_Core_Product_SRCS "products/*.h")
INSTALL(FILES
${Eigen_Core_Product_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/products COMPONENT Devel
)
ADD_SUBDIRECTORY(util)
ADD_SUBDIRECTORY(arch)

View File

@@ -40,7 +40,7 @@
* \sa operator()(int,int) const, coeffRef(int,int), coeff(int) const
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
::coeff(int row, int col) const
{
ei_internal_assert(row >= 0 && row < rows()
@@ -53,7 +53,7 @@ EIGEN_STRONG_INLINE const typename ei_traits<Derived>::Scalar MatrixBase<Derived
* \sa operator()(int,int), operator[](int) const
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
::operator()(int row, int col) const
{
ei_assert(row >= 0 && row < rows()
@@ -112,7 +112,7 @@ EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
* \sa operator[](int) const, coeffRef(int), coeff(int,int) const
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
::coeff(int index) const
{
ei_internal_assert(index >= 0 && index < size());
@@ -127,7 +127,7 @@ EIGEN_STRONG_INLINE const typename ei_traits<Derived>::Scalar MatrixBase<Derived
* z() const, w() const
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
::operator[](int index) const
{
ei_assert(index >= 0 && index < size());
@@ -144,7 +144,7 @@ EIGEN_STRONG_INLINE const typename ei_traits<Derived>::Scalar MatrixBase<Derived
* z() const, w() const
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
::operator()(int index) const
{
ei_assert(index >= 0 && index < size());
@@ -205,22 +205,22 @@ EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar& MatrixBase<Derived>
/** equivalent to operator[](0). */
template<typename Derived>
EIGEN_STRONG_INLINE const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
::x() const { return (*this)[0]; }
/** equivalent to operator[](1). */
template<typename Derived>
EIGEN_STRONG_INLINE const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
::y() const { return (*this)[1]; }
/** equivalent to operator[](2). */
template<typename Derived>
EIGEN_STRONG_INLINE const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
::z() const { return (*this)[2]; }
/** equivalent to operator[](3). */
template<typename Derived>
EIGEN_STRONG_INLINE const typename ei_traits<Derived>::Scalar MatrixBase<Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::CoeffReturnType MatrixBase<Derived>
::w() const { return (*this)[3]; }
/** equivalent to operator[](0). */

View File

@@ -64,7 +64,7 @@
* It is the return type of MatrixBase::cwise()
* and most of the time this is the only way it is used.
*
* Note that some methods are defined in the \ref Array module.
* Note that some methods are defined in the \ref Array_Module array module.
*
* Example: \include MatrixBase_cwise_const.cpp
* Output: \verbinclude MatrixBase_cwise_const.out

View File

@@ -188,7 +188,11 @@ MatrixBase<Derived>::imag() const { return derived(); }
*/
template<typename Derived>
template<typename NewType>
EIGEN_STRONG_INLINE const CwiseUnaryOp<ei_scalar_cast_op<typename ei_traits<Derived>::Scalar, NewType>, Derived>
EIGEN_STRONG_INLINE
typename ei_cast_return_type<
Derived,
const CwiseUnaryOp<ei_scalar_cast_op<typename ei_traits<Derived>::Scalar, NewType>, Derived>
>::type
MatrixBase<Derived>::cast() const
{
return derived();

187
Eigen/src/Core/Diagonal.h Normal file
View File

@@ -0,0 +1,187 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_DIAGONAL_H
#define EIGEN_DIAGONAL_H
/** \class Diagonal
*
* \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix
*
* \param MatrixType the type of the object in which we are taking a sub/main/super diagonal
* \param Index the index of the sub/super diagonal. The default is 0 and it means the main diagonal.
* A positive value means a superdiagonal, a negative value means a subdiagonal.
* You can also use Dynamic so the index can be set at runtime.
*
* The matrix is not required to be square.
*
* This class represents an expression of the main diagonal, or any sub/super diagonal
* of a square matrix. It is the return type of MatrixBase::diagonal() and MatrixBase::diagonal(int) and most of the
* time this is the only way it is used.
*
* \sa MatrixBase::diagonal(), MatrixBase::diagonal(int)
*/
template<typename MatrixType, int Index>
struct ei_traits<Diagonal<MatrixType,Index> >
{
typedef typename MatrixType::Scalar Scalar;
typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
enum {
AbsIndex = Index<0 ? -Index : Index, // only used if Index != Dynamic
RowsAtCompileTime = (int(Index) == Dynamic || int(MatrixType::SizeAtCompileTime) == Dynamic) ? Dynamic
: (EIGEN_ENUM_MIN(MatrixType::RowsAtCompileTime,
MatrixType::ColsAtCompileTime) - AbsIndex),
ColsAtCompileTime = 1,
MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic
: (EIGEN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime,
MatrixType::MaxColsAtCompileTime) - AbsIndex),
MaxColsAtCompileTime = 1,
Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit),
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
};
};
template<typename MatrixType, int Index> class Diagonal
: public MatrixBase<Diagonal<MatrixType, Index> >
{
// some compilers may fail to optimize std::max etc in case of compile-time constants...
EIGEN_STRONG_INLINE int absIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
EIGEN_STRONG_INLINE int rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
EIGEN_STRONG_INLINE int colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Diagonal)
inline Diagonal(const MatrixType& matrix, int index = Index) : m_matrix(matrix), m_index(index) {}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
inline int rows() const{ return m_matrix.diagonalSize() - absIndex(); }
inline int cols() const { return 1; }
inline Scalar& coeffRef(int row, int)
{
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
}
inline const Scalar coeff(int row, int) const
{
return m_matrix.coeff(row+rowOffset(), row+colOffset());
}
inline Scalar& coeffRef(int index)
{
return m_matrix.const_cast_derived().coeffRef(index+rowOffset(), index+colOffset());
}
inline const Scalar coeff(int index) const
{
return m_matrix.coeff(index+rowOffset(), index+colOffset());
}
protected:
const typename MatrixType::Nested m_matrix;
const ei_int_if_dynamic<Index> m_index;
};
/** \returns an expression of the main diagonal of the matrix \c *this
*
* \c *this is not required to be square.
*
* Example: \include MatrixBase_diagonal.cpp
* Output: \verbinclude MatrixBase_diagonal.out
*
* \sa class Diagonal */
template<typename Derived>
inline Diagonal<Derived, 0>
MatrixBase<Derived>::diagonal()
{
return Diagonal<Derived, 0>(derived());
}
/** This is the const version of diagonal(). */
template<typename Derived>
inline const Diagonal<Derived, 0>
MatrixBase<Derived>::diagonal() const
{
return Diagonal<Derived, 0>(derived());
}
/** \returns an expression of the \a Index-th sub or super diagonal of the matrix \c *this
*
* \c *this is not required to be square.
*
* The template parameter \a Index represent a super diagonal if \a Index > 0
* and a sub diagonal otherwise. \a Index == 0 is equivalent to the main diagonal.
*
* Example: \include MatrixBase_diagonal_int.cpp
* Output: \verbinclude MatrixBase_diagonal_int.out
*
* \sa MatrixBase::diagonal(), class Diagonal */
template<typename Derived>
inline Diagonal<Derived, Dynamic>
MatrixBase<Derived>::diagonal(int index)
{
return Diagonal<Derived, Dynamic>(derived(), index);
}
/** This is the const version of diagonal(int). */
template<typename Derived>
inline const Diagonal<Derived, Dynamic>
MatrixBase<Derived>::diagonal(int index) const
{
return Diagonal<Derived, Dynamic>(derived(), index);
}
/** \returns an expression of the \a Index-th sub or super diagonal of the matrix \c *this
*
* \c *this is not required to be square.
*
* The template parameter \a Index represent a super diagonal if \a Index > 0
* and a sub diagonal otherwise. \a Index == 0 is equivalent to the main diagonal.
*
* Example: \include MatrixBase_diagonal_template_int.cpp
* Output: \verbinclude MatrixBase_diagonal_template_int.out
*
* \sa MatrixBase::diagonal(), class Diagonal */
template<typename Derived>
template<int Index>
inline Diagonal<Derived,Index>
MatrixBase<Derived>::diagonal()
{
return Diagonal<Derived,Index>(derived());
}
/** This is the const version of diagonal<int>(). */
template<typename Derived>
template<int Index>
inline const Diagonal<Derived,Index>
MatrixBase<Derived>::diagonal() const
{
return Diagonal<Derived,Index>(derived());
}
#endif // EIGEN_DIAGONAL_H

View File

@@ -1,124 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_DIAGONALCOEFFS_H
#define EIGEN_DIAGONALCOEFFS_H
/** \class DiagonalCoeffs
*
* \brief Expression of the main diagonal of a matrix
*
* \param MatrixType the type of the object in which we are taking the main diagonal
*
* The matrix is not required to be square.
*
* This class represents an expression of the main diagonal of a square matrix.
* It is the return type of MatrixBase::diagonal() and most of the time this is
* the only way it is used.
*
* \sa MatrixBase::diagonal()
*/
template<typename MatrixType>
struct ei_traits<DiagonalCoeffs<MatrixType> >
{
typedef typename MatrixType::Scalar Scalar;
typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
enum {
RowsAtCompileTime = int(MatrixType::SizeAtCompileTime) == Dynamic ? Dynamic
: EIGEN_ENUM_MIN(MatrixType::RowsAtCompileTime,
MatrixType::ColsAtCompileTime),
ColsAtCompileTime = 1,
MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic
: EIGEN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime,
MatrixType::MaxColsAtCompileTime),
MaxColsAtCompileTime = 1,
Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit),
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
};
};
template<typename MatrixType> class DiagonalCoeffs
: public MatrixBase<DiagonalCoeffs<MatrixType> >
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(DiagonalCoeffs)
inline DiagonalCoeffs(const MatrixType& matrix) : m_matrix(matrix) {}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(DiagonalCoeffs)
inline int rows() const { return std::min(m_matrix.rows(), m_matrix.cols()); }
inline int cols() const { return 1; }
inline Scalar& coeffRef(int row, int)
{
return m_matrix.const_cast_derived().coeffRef(row, row);
}
inline const Scalar coeff(int row, int) const
{
return m_matrix.coeff(row, row);
}
inline Scalar& coeffRef(int index)
{
return m_matrix.const_cast_derived().coeffRef(index, index);
}
inline const Scalar coeff(int index) const
{
return m_matrix.coeff(index, index);
}
protected:
const typename MatrixType::Nested m_matrix;
};
/** \returns an expression of the main diagonal of the matrix \c *this
*
* \c *this is not required to be square.
*
* Example: \include MatrixBase_diagonal.cpp
* Output: \verbinclude MatrixBase_diagonal.out
*
* \sa class DiagonalCoeffs */
template<typename Derived>
inline DiagonalCoeffs<Derived>
MatrixBase<Derived>::diagonal()
{
return DiagonalCoeffs<Derived>(derived());
}
/** This is the const version of diagonal(). */
template<typename Derived>
inline const DiagonalCoeffs<Derived>
MatrixBase<Derived>::diagonal() const
{
return DiagonalCoeffs<Derived>(derived());
}
#endif // EIGEN_DIAGONALCOEFFS_H

View File

@@ -1,6 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
@@ -25,57 +26,92 @@
#ifndef EIGEN_DIAGONALMATRIX_H
#define EIGEN_DIAGONALMATRIX_H
/** \class DiagonalMatrix
* \nonstableyet
*
* \brief Expression of a diagonal matrix
*
* \param CoeffsVectorType the type of the vector of diagonal coefficients
*
* This class is an expression of a diagonal matrix with given vector of diagonal
* coefficients. It is the return
* type of MatrixBase::diagonal(const OtherDerived&) and most of the time this is
* the only way it is used.
*
* \sa MatrixBase::diagonal(const OtherDerived&)
*/
template<typename CoeffsVectorType>
struct ei_traits<DiagonalMatrix<CoeffsVectorType> >
{
typedef typename CoeffsVectorType::Scalar Scalar;
typedef typename ei_nested<CoeffsVectorType>::type CoeffsVectorTypeNested;
typedef typename ei_unref<CoeffsVectorTypeNested>::type _CoeffsVectorTypeNested;
enum {
RowsAtCompileTime = CoeffsVectorType::SizeAtCompileTime,
ColsAtCompileTime = CoeffsVectorType::SizeAtCompileTime,
MaxRowsAtCompileTime = CoeffsVectorType::MaxSizeAtCompileTime,
MaxColsAtCompileTime = CoeffsVectorType::MaxSizeAtCompileTime,
Flags = (_CoeffsVectorTypeNested::Flags & HereditaryBits) | Diagonal,
CoeffReadCost = _CoeffsVectorTypeNested::CoeffReadCost
};
};
template<typename CoeffsVectorType>
class DiagonalMatrix : ei_no_assignment_operator,
public MatrixBase<DiagonalMatrix<CoeffsVectorType> >
template<typename CoeffsVectorType, typename Derived>
class DiagonalMatrixBase : ei_no_assignment_operator,
public MatrixBase<Derived>
{
public:
typedef MatrixBase<Derived> Base;
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename Base::PacketScalar PacketScalar;
using Base::derived;
typedef typename ei_cleantype<CoeffsVectorType>::type _CoeffsVectorType;
EIGEN_GENERIC_PUBLIC_INTERFACE(DiagonalMatrix)
protected:
// needed to evaluate a DiagonalMatrix<Xpr> to a DiagonalMatrix<NestByValue<Vector> >
template<typename OtherCoeffsVectorType>
inline DiagonalMatrix(const DiagonalMatrix<OtherCoeffsVectorType>& other) : m_coeffs(other.diagonal())
// MSVC gets crazy if we define default parameters
template<typename OtherDerived, bool IsVector, bool IsDiagonal> struct construct_from_expression;
// = vector
template<typename OtherDerived>
struct construct_from_expression<OtherDerived,true,false>
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(CoeffsVectorType);
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherCoeffsVectorType);
ei_assert(m_coeffs.size() > 0);
static void run(Derived& dst, const OtherDerived& src)
{ dst.diagonal() = src; }
};
// = diagonal expression
template<typename OtherDerived, bool IsVector>
struct construct_from_expression<OtherDerived,IsVector,true>
{
static void run(Derived& dst, const OtherDerived& src)
{ dst.diagonal() = src.diagonal(); }
};
/** Default constructor without initialization */
inline DiagonalMatrixBase() {}
/** Constructs a diagonal matrix with given dimension */
inline DiagonalMatrixBase(int dim) : m_coeffs(dim) {}
/** Generic constructor from an expression */
template<typename OtherDerived>
inline DiagonalMatrixBase(const MatrixBase<OtherDerived>& other)
{
construct_from_expression<OtherDerived,OtherDerived::IsVectorAtCompileTime,ei_is_diagonal<OtherDerived>::ret>
::run(derived(),other.derived());
}
template<typename NewType,int dummy=0>
struct cast_selector {
typedef const DiagonalMatrixWrapper<NestByValue<CwiseUnaryOp<ei_scalar_cast_op<Scalar, NewType>, _CoeffsVectorType> > > return_type;
inline static return_type run(const DiagonalMatrixBase& d) {
return d.m_coeffs.template cast<NewType>().nestByValue().asDiagonal();
}
};
template<int dummy>
struct cast_selector<Scalar,dummy> {
typedef const Derived& return_type;
inline static return_type run(const DiagonalMatrixBase& d) {
return d.derived();
}
};
public:
inline DiagonalMatrixBase(const _CoeffsVectorType& coeffs) : m_coeffs(coeffs)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(_CoeffsVectorType);
ei_assert(coeffs.size() > 0);
}
inline DiagonalMatrix(const CoeffsVectorType& coeffs) : m_coeffs(coeffs)
template<typename NewType>
inline typename cast_selector<NewType,0>::return_type
cast() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(CoeffsVectorType);
ei_assert(coeffs.size() > 0);
return cast_selector<NewType,0>::run(*this);
}
/** Assignment operator.
* The right-hand-side \a other must be either a vector representing the diagonal
* coefficients or a diagonal matrix expression.
*/
template<typename OtherDerived>
inline Derived& operator=(const MatrixBase<OtherDerived>& other)
{
construct_from_expression<OtherDerived,OtherDerived::IsVectorAtCompileTime,ei_is_diagonal<OtherDerived>::ret>
::run(derived(),other);
return derived();
}
inline int rows() const { return m_coeffs.size(); }
@@ -86,13 +122,153 @@ class DiagonalMatrix : ei_no_assignment_operator,
return row == col ? m_coeffs.coeff(row) : static_cast<Scalar>(0);
}
inline const CoeffsVectorType& diagonal() const { return m_coeffs; }
inline Scalar& coeffRef(int row, int col)
{
ei_assert(row==col);
return m_coeffs.coeffRef(row);
}
inline _CoeffsVectorType& diagonal() { return m_coeffs; }
inline const _CoeffsVectorType& diagonal() const { return m_coeffs; }
protected:
const typename CoeffsVectorType::Nested m_coeffs;
CoeffsVectorType m_coeffs;
};
/** \nonstableyet
/** \class DiagonalMatrix
* \nonstableyet
*
* \brief Represent a diagonal matrix with its storage
*
* \param _Scalar the type of coefficients
* \param _Size the dimension of the matrix
*
* \sa class Matrix
*/
template<typename _Scalar,int _Size>
struct ei_traits<DiagonalMatrix<_Scalar,_Size> > : ei_traits<Matrix<_Scalar,_Size,_Size> >
{
enum {
Flags = (ei_traits<Matrix<_Scalar,_Size,_Size> >::Flags & HereditaryBits) | DiagonalBits
};
};
template<typename _Scalar, int _Size>
class DiagonalMatrix
: public DiagonalMatrixBase<Matrix<_Scalar,_Size,1>, DiagonalMatrix<_Scalar,_Size> >
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(DiagonalMatrix)
typedef DiagonalMatrixBase<Matrix<_Scalar,_Size,1>, DiagonalMatrix<_Scalar,_Size> > DiagonalBase;
protected:
typedef Matrix<_Scalar,_Size,1> CoeffVectorType;
using DiagonalBase::m_coeffs;
public:
/** Default constructor without initialization */
inline DiagonalMatrix() : DiagonalBase()
{}
/** Constructs a diagonal matrix with given dimension */
inline DiagonalMatrix(int dim) : DiagonalBase(dim)
{}
/** 2D only */
inline DiagonalMatrix(const Scalar& sx, const Scalar& sy)
{
EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(DiagonalMatrix,2,2);
m_coeffs.x() = sx;
m_coeffs.y() = sy;
}
/** 3D only */
inline DiagonalMatrix(const Scalar& sx, const Scalar& sy, const Scalar& sz)
{
EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(DiagonalMatrix,3,3);
m_coeffs.x() = sx;
m_coeffs.y() = sy;
m_coeffs.z() = sz;
}
/** copy constructor */
inline DiagonalMatrix(const DiagonalMatrix& other) : DiagonalBase(other.m_coeffs)
{}
/** generic constructor from expression */
template<typename OtherDerived>
explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : DiagonalBase(other)
{}
DiagonalMatrix& operator=(const DiagonalMatrix& other)
{
m_coeffs = other.m_coeffs;
return *this;
}
template<typename OtherDerived>
DiagonalMatrix& operator=(const MatrixBase<OtherDerived>& other)
{
EIGEN_STATIC_ASSERT(ei_is_diagonal<OtherDerived>::ret, THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX);
m_coeffs = other.diagonal();
return *this;
}
inline void resize(int size)
{
m_coeffs.resize(size);
}
inline void resize(int rows, int cols)
{
ei_assert(rows==cols && "a diagonal matrix must be square");
m_coeffs.resize(rows);
}
inline void setZero() { m_coeffs.setZero(); }
};
/** \class DiagonalMatrixWrapper
* \nonstableyet
*
* \brief Expression of a diagonal matrix
*
* \param CoeffsVectorType the type of the vector of diagonal coefficients
*
* This class is an expression of a diagonal matrix with given vector of diagonal
* coefficients. It is the return type of MatrixBase::diagonal(const OtherDerived&)
* and most of the time this is the only way it is used.
*
* \sa class DiagonalMatrixBase, class DiagonalMatrix, MatrixBase::asDiagonal()
*/
template<typename CoeffsVectorType>
struct ei_traits<DiagonalMatrixWrapper<CoeffsVectorType> >
{
typedef typename CoeffsVectorType::Scalar Scalar;
typedef typename ei_nested<CoeffsVectorType>::type CoeffsVectorTypeNested;
typedef typename ei_unref<CoeffsVectorTypeNested>::type _CoeffsVectorTypeNested;
enum {
RowsAtCompileTime = CoeffsVectorType::SizeAtCompileTime,
ColsAtCompileTime = CoeffsVectorType::SizeAtCompileTime,
MaxRowsAtCompileTime = CoeffsVectorType::MaxSizeAtCompileTime,
MaxColsAtCompileTime = CoeffsVectorType::MaxSizeAtCompileTime,
Flags = (_CoeffsVectorTypeNested::Flags & HereditaryBits) | DiagonalBits,
CoeffReadCost = _CoeffsVectorTypeNested::CoeffReadCost
};
};
template<typename CoeffsVectorType>
class DiagonalMatrixWrapper
: public DiagonalMatrixBase<typename CoeffsVectorType::Nested, DiagonalMatrixWrapper<CoeffsVectorType> >
{
typedef typename CoeffsVectorType::Nested CoeffsVectorTypeNested;
typedef DiagonalMatrixBase<CoeffsVectorTypeNested, DiagonalMatrixWrapper<CoeffsVectorType> > DiagonalBase;
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(DiagonalMatrixWrapper)
inline DiagonalMatrixWrapper(const CoeffsVectorType& coeffs) : DiagonalBase(coeffs)
{}
};
/** \nonstableyet
* \returns an expression of a diagonal matrix with *this as vector of diagonal coefficients
*
* \only_for_vectors
@@ -105,13 +281,13 @@ class DiagonalMatrix : ei_no_assignment_operator,
* \sa class DiagonalMatrix, isDiagonal()
**/
template<typename Derived>
inline const DiagonalMatrix<Derived>
inline const DiagonalMatrixWrapper<Derived>
MatrixBase<Derived>::asDiagonal() const
{
return derived();
}
/** \nonstableyet
/** \nonstableyet
* \returns true if *this is approximately equal to a diagonal matrix,
* within the precision given by \a prec.
*

View File

@@ -30,9 +30,9 @@
* Unlike ei_nested, if the argument is a DiagonalMatrix and if it must be evaluated,
* then it evaluated to a DiagonalMatrix having its own argument evaluated.
*/
template<typename T, int N> struct ei_nested_diagonal : ei_nested<T,N> {};
template<typename T, int N> struct ei_nested_diagonal<DiagonalMatrix<T>,N >
: ei_nested<DiagonalMatrix<T>, N, DiagonalMatrix<NestByValue<typename ei_plain_matrix_type<T>::type> > >
template<typename T, int N, bool IsDiagonal = ei_is_diagonal<T>::ret> struct ei_nested_diagonal : ei_nested<T,N> {};
template<typename T, int N> struct ei_nested_diagonal<T,N,true>
: ei_nested<T, N, DiagonalMatrix<typename T::Scalar, EIGEN_ENUM_MIN(T::RowsAtCompileTime,T::ColsAtCompileTime)> >
{};
// specialization of ProductReturnType
@@ -61,8 +61,8 @@ struct ei_traits<Product<LhsNested, RhsNested, DiagonalProduct> >
MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
LhsIsDiagonal = (_LhsNested::Flags&Diagonal)==Diagonal,
RhsIsDiagonal = (_RhsNested::Flags&Diagonal)==Diagonal,
LhsIsDiagonal = ei_is_diagonal<_LhsNested>::ret,
RhsIsDiagonal = ei_is_diagonal<_RhsNested>::ret,
CanVectorizeRhs = (!RhsIsDiagonal) && (RhsFlags & RowMajorBit) && (RhsFlags & PacketAccessBit)
&& (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
@@ -86,7 +86,7 @@ template<typename LhsNested, typename RhsNested> class Product<LhsNested, RhsNes
typedef typename ei_traits<Product>::_RhsNested _RhsNested;
enum {
RhsIsDiagonal = (_RhsNested::Flags&Diagonal)==Diagonal
RhsIsDiagonal = ei_is_diagonal<_RhsNested>::ret
};
public:

View File

@@ -266,7 +266,10 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
ei_assert(size() == other.size());
return ei_dot_impl<Derived, OtherDerived>::run(derived(), other.derived());
// dot() must honor EvalBeforeNestingBit (eg: v.dot(M*v) )
typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested;
typedef typename ei_cleantype<typename OtherDerived::Nested>::type OtherNested;
return ei_dot_impl<ThisNested, OtherNested>::run(derived(), other.derived());
}
/** \returns the squared \em l2 norm of *this, i.e., for vectors, the dot product of *this with itself.
@@ -289,6 +292,18 @@ inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<
return ei_sqrt(squaredNorm());
}
/** \returns the \em l2 norm of \c *this using a numerically more stable
* algorithm.
*
* \sa norm(), dot(), squaredNorm()
*/
template<typename Derived>
inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real
MatrixBase<Derived>::stableNorm() const
{
return this->cwise().abs().redux(ei_scalar_hypot_op<RealScalar>());
}
/** \returns an expression of the quotient of *this by its own norm.
*
* \only_for_vectors

View File

@@ -37,6 +37,9 @@ template<typename Scalar> struct ei_scalar_sum_op EIGEN_EMPTY_STRUCT {
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
{ return ei_padd(a,b); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
{ return ei_predux(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
@@ -56,6 +59,9 @@ template<typename Scalar> struct ei_scalar_product_op EIGEN_EMPTY_STRUCT {
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
{ return ei_pmul(a,b); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
{ return ei_predux_mul(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
@@ -75,6 +81,9 @@ template<typename Scalar> struct ei_scalar_min_op EIGEN_EMPTY_STRUCT {
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
{ return ei_pmin(a,b); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
{ return ei_predux_min(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_min_op<Scalar> > {
@@ -94,6 +103,9 @@ template<typename Scalar> struct ei_scalar_max_op EIGEN_EMPTY_STRUCT {
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
{ return ei_pmax(a,b); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
{ return ei_predux_max(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_max_op<Scalar> > {
@@ -103,6 +115,28 @@ struct ei_functor_traits<ei_scalar_max_op<Scalar> > {
};
};
/** \internal
* \brief Template functor to compute the hypot of two scalars
*
* \sa MatrixBase::stableNorm(), class Redux
*/
template<typename Scalar> struct ei_scalar_hypot_op EIGEN_EMPTY_STRUCT {
// typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
{
// typedef typename NumTraits<T>::Real RealScalar;
// RealScalar _x = ei_abs(x);
// RealScalar _y = ei_abs(y);
Scalar p = std::max(_x, _y);
Scalar q = std::min(_x, _y);
Scalar qp = q/p;
return p * ei_sqrt(Scalar(1) + qp*qp);
}
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_hypot_op<Scalar> > {
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess=0 };
};
// other binary functors:
@@ -156,10 +190,16 @@ struct ei_functor_traits<ei_scalar_quotient_op<Scalar> > {
*/
template<typename Scalar> struct ei_scalar_opposite_op EIGEN_EMPTY_STRUCT {
EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
{ return ei_pnegate(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_opposite_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false }; };
{ enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = int(ei_packet_traits<Scalar>::size)>1 };
};
/** \internal
* \brief Template functor to compute the absolute value of a scalar
@@ -169,13 +209,16 @@ struct ei_functor_traits<ei_scalar_opposite_op<Scalar> >
template<typename Scalar> struct ei_scalar_abs_op EIGEN_EMPTY_STRUCT {
typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return ei_abs(a); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const
{ return ei_pabs(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_abs_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = false // this could actually be vectorized with SSSE3.
PacketAccess = int(ei_packet_traits<Scalar>::size)>1
};
};
@@ -265,7 +308,7 @@ struct ei_functor_traits<ei_scalar_imag_op<Scalar> >
* indeed it seems better to declare m_other as a PacketScalar and do the ei_pset1() once
* in the constructor. However, in practice:
* - GCC does not like m_other as a PacketScalar and generate a load every time it needs it
* - one the other hand GCC is able to moves the ei_pset1() away the loop :)
* - on the other hand GCC is able to moves the ei_pset1() away the loop :)
* - simpler code ;)
* (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y)
*/
@@ -284,6 +327,18 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_multiple_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = ei_packet_traits<Scalar>::size>1 }; };
template<typename Scalar1, typename Scalar2>
struct ei_scalar_multiple2_op {
typedef typename ei_scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const ei_scalar_multiple2_op& other) : m_other(other.m_other) { }
EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
const Scalar2 m_other;
};
template<typename Scalar1,typename Scalar2>
struct ei_functor_traits<ei_scalar_multiple2_op<Scalar1,Scalar2> >
{ enum { Cost = NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
template<typename Scalar, bool HasFloatingPoint>
struct ei_scalar_quotient1_impl {
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@@ -324,6 +379,10 @@ struct ei_scalar_quotient1_op : ei_scalar_quotient1_impl<Scalar, NumTraits<Scala
EIGEN_STRONG_INLINE ei_scalar_quotient1_op(const Scalar& other)
: ei_scalar_quotient1_impl<Scalar, NumTraits<Scalar>::HasFloatingPoint >(other) {}
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_quotient1_op<Scalar> >
: ei_functor_traits<ei_scalar_quotient1_impl<Scalar, NumTraits<Scalar>::HasFloatingPoint> >
{};
// nullary functors

View File

@@ -34,6 +34,47 @@
* of generic vectorized code.
*/
struct ei_default_packet_traits
{
enum {
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasNegate = 1,
HasAbs = 1,
HasMin = 1,
HasMax = 1,
HasDiv = 0,
HasSqrt = 0,
HasExp = 0,
HasLog = 0,
HasPow = 0,
HasSin = 0,
HasCos = 0,
HasTan = 0,
HasASin = 0,
HasACos = 0,
HasATan = 0
};
};
template<typename T> struct ei_packet_traits : ei_default_packet_traits
{
typedef T type;
enum {size=1};
enum {
HasAdd = 0,
HasSub = 0,
HasMul = 0,
HasNegate = 0,
HasAbs = 0,
HasMin = 0,
HasMax = 0
};
};
/** \internal \returns a + b (coeff-wise) */
template<typename Packet> inline Packet
ei_padd(const Packet& a,
@@ -44,6 +85,10 @@ template<typename Packet> inline Packet
ei_psub(const Packet& a,
const Packet& b) { return a-b; }
/** \internal \returns -a (coeff-wise) */
template<typename Packet> inline Packet
ei_pnegate(const Packet& a) { return -a; }
/** \internal \returns a * b (coeff-wise) */
template<typename Packet> inline Packet
ei_pmul(const Packet& a,
@@ -64,6 +109,26 @@ template<typename Packet> inline Packet
ei_pmax(const Packet& a,
const Packet& b) { return std::max(a, b); }
/** \internal \returns the absolute value of \a a */
template<typename Packet> inline Packet
ei_pabs(const Packet& a) { return ei_abs(a); }
/** \internal \returns the bitwise and of \a a and \a b */
template<typename Packet> inline Packet
ei_pand(const Packet& a, const Packet& b) { return a & b; }
/** \internal \returns the bitwise or of \a a and \a b */
template<typename Packet> inline Packet
ei_por(const Packet& a, const Packet& b) { return a | b; }
/** \internal \returns the bitwise xor of \a a and \a b */
template<typename Packet> inline Packet
ei_pxor(const Packet& a, const Packet& b) { return a ^ b; }
/** \internal \returns the bitwise andnot of \a a and \a b */
template<typename Packet> inline Packet
ei_pandnot(const Packet& a, const Packet& b) { return a & (!b); }
/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
ei_pload(const Scalar* from) { return *from; }
@@ -96,6 +161,40 @@ ei_preduxp(const Packet* vecs) { return vecs[0]; }
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux(const Packet& a)
{ return a; }
/** \internal \returns the product of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_mul(const Packet& a)
{ return a; }
/** \internal \returns the min of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_min(const Packet& a)
{ return a; }
/** \internal \returns the max of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_max(const Packet& a)
{ return a; }
/** \internal \returns the reversed elements of \a a*/
template<typename Packet> inline Packet ei_preverse(const Packet& a)
{ return a; }
/**************************
* Special math functions
***************************/
/** \internal \returns the sin of \a a (coeff-wise) */
template<typename Packet> inline static Packet ei_psin(Packet a) { return ei_sin(a); }
/** \internal \returns the cos of \a a (coeff-wise) */
template<typename Packet> inline static Packet ei_pcos(Packet a) { return ei_cos(a); }
/** \internal \returns the exp of \a a (coeff-wise) */
template<typename Packet> inline static Packet ei_pexp(Packet a) { return ei_exp(a); }
/** \internal \returns the log of \a a (coeff-wise) */
template<typename Packet> inline static Packet ei_plog(Packet a) { return ei_log(a); }
/** \internal \returns the square-root of \a a (coeff-wise) */
template<typename Packet> inline static Packet ei_psqrt(Packet a) { return ei_sqrt(a); }
/***************************************************************************
* The following functions might not have to be overwritten for vectorized types

View File

@@ -40,6 +40,12 @@
* It can be used to let Eigen interface without any overhead with non-Eigen data structures,
* such as plain C arrays or structures from other libraries.
*
* \b Tips: to change the array of data mapped by a Map object, you can use the C++
* placement new syntax:
*
* Example: \include Map_placement_new.cpp
* Output: \verbinclude Map_placement_new.out
*
* This class is the return type of Matrix::Map() but can also be used directly.
*
* \sa Matrix::Map()

View File

@@ -96,7 +96,7 @@ template<typename Derived> class MapBase
return const_cast<Scalar*>(m_data)[row + col * stride()];
}
inline const Scalar coeff(int index) const
inline const Scalar& coeff(int index) const
{
ei_assert(Derived::IsVectorAtCompileTime || (ei_traits<Derived>::Flags & LinearAccessBit));
if ( ((RowsAtCompileTime == 1) == IsRowMajor) )
@@ -176,7 +176,7 @@ template<typename Derived> class MapBase
{
return Base::operator=(other);
}
using Base::operator*=;
template<typename OtherDerived>

View File

@@ -26,6 +26,7 @@
#define EIGEN_MATHFUNCTIONS_H
template<typename T> inline typename NumTraits<T>::Real precision();
template<typename T> inline typename NumTraits<T>::Real machine_epsilon();
template<typename T> inline T ei_random(T a, T b);
template<typename T> inline T ei_random();
template<typename T> inline T ei_random_amplitude()
@@ -34,10 +35,11 @@ template<typename T> inline T ei_random_amplitude()
else return static_cast<T>(10);
}
template<typename T> inline T ei_hypot(T x, T y)
template<typename T> inline typename NumTraits<T>::Real ei_hypot(T x, T y)
{
T _x = ei_abs(x);
T _y = ei_abs(y);
typedef typename NumTraits<T>::Real RealScalar;
RealScalar _x = ei_abs(x);
RealScalar _y = ei_abs(y);
T p = std::max(_x, _y);
T q = std::min(_x, _y);
T qp = q/p;
@@ -49,6 +51,7 @@ template<typename T> inline T ei_hypot(T x, T y)
**************/
template<> inline int precision<int>() { return 0; }
template<> inline int machine_epsilon<int>() { return 0; }
inline int ei_real(int x) { return x; }
inline int ei_imag(int) { return 0; }
inline int ei_conj(int x) { return x; }
@@ -59,12 +62,20 @@ inline int ei_exp(int) { ei_assert(false); return 0; }
inline int ei_log(int) { ei_assert(false); return 0; }
inline int ei_sin(int) { ei_assert(false); return 0; }
inline int ei_cos(int) { ei_assert(false); return 0; }
#if EIGEN_GNUC_AT_LEAST(4,3)
inline int ei_pow(int x, int y) { return int(std::pow(x, y)); }
#else
inline int ei_pow(int x, int y) { return int(std::pow(double(x), y)); }
#endif
inline int ei_pow(int x, int y)
{
int res = 1;
if(y < 0) return 0;
if(y & 1) res *= x;
y >>= 1;
while(y)
{
x *= x;
if(y&1) res *= x;
y >>= 1;
}
return res;
}
template<> inline int ei_random(int a, int b)
{
@@ -93,6 +104,7 @@ inline bool ei_isApproxOrLessThan(int a, int b, int = precision<int>())
**************/
template<> inline float precision<float>() { return 1e-5f; }
template<> inline float machine_epsilon<float>() { return 1.192e-07f; }
inline float ei_real(float x) { return x; }
inline float ei_imag(float) { return 0.f; }
inline float ei_conj(float x) { return x; }
@@ -138,6 +150,8 @@ inline bool ei_isApproxOrLessThan(float a, float b, float prec = precision<float
**************/
template<> inline double precision<double>() { return 1e-11; }
template<> inline double machine_epsilon<double>() { return 2.220e-16; }
inline double ei_real(double x) { return x; }
inline double ei_imag(double) { return 0.; }
inline double ei_conj(double x) { return x; }
@@ -183,6 +197,7 @@ inline bool ei_isApproxOrLessThan(double a, double b, double prec = precision<do
*********************/
template<> inline float precision<std::complex<float> >() { return precision<float>(); }
template<> inline float machine_epsilon<std::complex<float> >() { return machine_epsilon<float>(); }
inline float ei_real(const std::complex<float>& x) { return std::real(x); }
inline float ei_imag(const std::complex<float>& x) { return std::imag(x); }
inline std::complex<float> ei_conj(const std::complex<float>& x) { return std::conj(x); }
@@ -216,6 +231,7 @@ inline bool ei_isApprox(const std::complex<float>& a, const std::complex<float>&
**********************/
template<> inline double precision<std::complex<double> >() { return precision<double>(); }
template<> inline double machine_epsilon<std::complex<double> >() { return machine_epsilon<double>(); }
inline double ei_real(const std::complex<double>& x) { return std::real(x); }
inline double ei_imag(const std::complex<double>& x) { return std::imag(x); }
inline std::complex<double> ei_conj(const std::complex<double>& x) { return std::conj(x); }
@@ -250,6 +266,7 @@ inline bool ei_isApprox(const std::complex<double>& a, const std::complex<double
******************/
template<> inline long double precision<long double>() { return precision<double>(); }
template<> inline long double machine_epsilon<long double>() { return 1.084e-19l; }
inline long double ei_real(long double x) { return x; }
inline long double ei_imag(long double) { return 0.; }
inline long double ei_conj(long double x) { return x; }

View File

@@ -134,7 +134,7 @@ class Matrix
ei_matrix_storage<Scalar, MaxSizeAtCompileTime, RowsAtCompileTime, ColsAtCompileTime, Options> m_storage;
public:
enum { NeedsToAlign = (Options&AutoAlign) == AutoAlign
enum { NeedsToAlign = (!(Options&DontAlign))
&& SizeAtCompileTime!=Dynamic && ((sizeof(Scalar)*SizeAtCompileTime)%16)==0 };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
@@ -246,6 +246,29 @@ class Matrix
m_storage.resize(size, size, 1);
}
/** Resizes *this to have the same dimensions as \a other.
* Takes care of doing all the checking that's needed.
*
* Note that copying a row-vector into a vector (and conversely) is allowed.
* The resizing, if any, is then done in the appropriate way so that row-vectors
* remain row-vectors and vectors remain vectors.
*/
template<typename OtherDerived>
EIGEN_STRONG_INLINE void resizeLike(const MatrixBase<OtherDerived>& other)
{
if(RowsAtCompileTime == 1)
{
ei_assert(other.isVector());
resize(1, other.size());
}
else if(ColsAtCompileTime == 1)
{
ei_assert(other.isVector());
resize(other.size(), 1);
}
else resize(other.rows(), other.cols());
}
/** Copies the value of the expression \a other into \c *this with automatic resizing.
*
* *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
@@ -266,9 +289,13 @@ class Matrix
*/
EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)
{
return _set(other);
return _set(other);
}
template<typename OtherDerived,typename OtherEvalType>
EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived,OtherEvalType>& func)
{ return Base::operator=(func); }
EIGEN_INHERIT_ASSIGNMENT_OPERATOR(Matrix, +=)
EIGEN_INHERIT_ASSIGNMENT_OPERATOR(Matrix, -=)
EIGEN_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Matrix, *=)
@@ -311,47 +338,24 @@ class Matrix
ei_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
}
/** This constructor has two very different behaviors, depending on the type of *this.
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename T0, typename T1>
EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y)
{
_check_template_params();
_init2<T0,T1>(x, y);
}
#else
/** constructs an uninitialized matrix with \a rows rows and \a cols columns.
*
* \li When Matrix is a fixed-size vector type of size 2, this constructor constructs
* an initialized vector. The parameters \a x, \a y are copied into the first and second
* coords of the vector respectively.
* \li Otherwise, this constructor constructs an uninitialized matrix with \a x rows and
* \a y columns. This is useful for dynamic-size matrices. For fixed-size matrices,
* it is redundant to pass these parameters, so one should use the default constructor
* Matrix() instead.
*/
EIGEN_STRONG_INLINE Matrix(int x, int y) : m_storage(x*y, x, y)
{
_check_template_params();
if((RowsAtCompileTime == 1 && ColsAtCompileTime == 2)
|| (RowsAtCompileTime == 2 && ColsAtCompileTime == 1))
{
m_storage.data()[0] = Scalar(x);
m_storage.data()[1] = Scalar(y);
}
else
{
ei_assert(x > 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == x)
&& y > 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == y));
}
}
* This is useful for dynamic-size matrices. For fixed-size matrices,
* it is redundant to pass these parameters, so one should use the default constructor
* Matrix() instead. */
Matrix(int rows, int cols);
/** constructs an initialized 2D vector with given coefficients */
EIGEN_STRONG_INLINE Matrix(const float& x, const float& y)
{
_check_template_params();
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 2)
m_storage.data()[0] = x;
m_storage.data()[1] = y;
}
/** constructs an initialized 2D vector with given coefficients */
EIGEN_STRONG_INLINE Matrix(const double& x, const double& y)
{
_check_template_params();
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 2)
m_storage.data()[0] = x;
m_storage.data()[1] = y;
}
Matrix(const Scalar& x, const Scalar& y);
#endif
/** constructs an initialized 3D vector with given coefficients */
EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
{
@@ -389,6 +393,10 @@ class Matrix
_check_template_params();
_set_noalias(other);
}
/** Copy constructor with in-place evaluation */
template<typename OtherDerived,typename OtherEvalType>
EIGEN_STRONG_INLINE Matrix(const ReturnByValue<OtherDerived,OtherEvalType>& other)
{ other.evalTo(*this); }
/** Destructor */
inline ~Matrix() {}
@@ -480,17 +488,12 @@ class Matrix
template<typename OtherDerived>
EIGEN_STRONG_INLINE void _resize_to_match(const MatrixBase<OtherDerived>& other)
{
if(RowsAtCompileTime == 1)
{
ei_assert(other.isVector());
resize(1, other.size());
}
else if(ColsAtCompileTime == 1)
{
ei_assert(other.isVector());
resize(other.size(), 1);
}
else resize(other.rows(), other.cols());
#ifdef EIGEN_NO_AUTOMATIC_RESIZING
ei_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size())
: (rows() == other.rows() && cols() == other.cols())))
&& "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
#endif
resizeLike(other);
}
/** \internal Copies the value of the expression \a other into \c *this with automatic resizing.
@@ -527,13 +530,28 @@ class Matrix
static EIGEN_STRONG_INLINE void _check_template_params()
{
EIGEN_STATIC_ASSERT((_Rows > 0
&& _Cols > 0
&& _MaxRows <= _Rows
&& _MaxCols <= _Cols
&& (_Options & (AutoAlign|RowMajor)) == _Options),
EIGEN_STATIC_ASSERT(((_MaxRows >= _Rows || _Rows==Dynamic)
&& (_MaxCols >= _Cols || _Cols==Dynamic)
&& ((_MaxRows==Dynamic?1:_MaxRows)*(_MaxCols==Dynamic?1:_MaxCols)<Dynamic)
&& (_Options & (DontAlign|RowMajor)) == _Options),
INVALID_MATRIX_TEMPLATE_PARAMETERS)
}
template<typename T0, typename T1>
EIGEN_STRONG_INLINE void _init2(int rows, int cols, typename ei_enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
{
ei_assert(rows > 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
&& cols > 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
m_storage.resize(rows*cols,rows,cols);
}
template<typename T0, typename T1>
EIGEN_STRONG_INLINE void _init2(const Scalar& x, const Scalar& y, typename ei_enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 2)
m_storage.data()[0] = x;
m_storage.data()[1] = y;
}
};
/** \defgroup matrixtypedefs Global matrix typedefs

View File

@@ -34,7 +34,7 @@
* types. Most of the Eigen API is contained in this class. Other important classes for
* the Eigen API are Matrix, Cwise, and PartialRedux.
*
* Note that some methods are defined in the \ref Array module.
* Note that some methods are defined in the \ref Array_Module array module.
*
* \param Derived is the derived type, e.g. a matrix type, or an expression, etc.
*
@@ -53,10 +53,17 @@
*
*/
template<typename Derived> class MatrixBase
#ifndef EIGEN_PARSED_BY_DOXYGEN
: public ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar,
typename NumTraits<typename ei_traits<Derived>::Scalar>::Real>
#endif // not EIGEN_PARSED_BY_DOXYGEN
{
public:
#ifndef EIGEN_PARSED_BY_DOXYGEN
using ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar,
typename NumTraits<typename ei_traits<Derived>::Scalar>::Real>::operator*;
class InnerIterator;
typedef typename ei_traits<Derived>::Scalar Scalar;
@@ -160,12 +167,15 @@ template<typename Derived> class MatrixBase
inline int rows() const { return derived().rows(); }
/** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
inline int cols() const { return derived().cols(); }
/** \returns the number of coefficients, which is \a rows()*cols().
/** \returns the number of coefficients, which is rows()*cols().
* \sa rows(), cols(), SizeAtCompileTime. */
inline int size() const { return rows() * cols(); }
/** \returns the size of the main diagonal, which is min(rows(),cols()).
* \sa rows(), cols(), SizeAtCompileTime. */
inline int diagonalSize() const { return std::min(rows(),cols()); }
/** \returns the number of nonzero coefficients which is in practice the number
* of stored coefficients. */
inline int nonZeros() const { return derived.nonZeros(); }
inline int nonZeros() const { return derived().nonZeros(); }
/** \returns true if either the number of rows or the number of columns is equal to 1.
* In other words, this function returns
* \code rows()==1 || cols()==1 \endcode
@@ -192,6 +202,10 @@ template<typename Derived> class MatrixBase
*/
typedef typename ei_plain_matrix_type<Derived>::type PlainMatrixType_ColMajor;
/** \internal the return type of coeff()
*/
typedef typename ei_meta_if<bool(int(Flags)&DirectAccessBit), const Scalar&, Scalar>::ret CoeffReturnType;
/** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType;
/** \internal Represents a scalar multiple of a matrix */
@@ -257,15 +271,15 @@ template<typename Derived> class MatrixBase
template<typename OtherDerived>
CommaInitializer<Derived> operator<< (const MatrixBase<OtherDerived>& other);
const Scalar coeff(int row, int col) const;
const Scalar operator()(int row, int col) const;
const CoeffReturnType coeff(int row, int col) const;
const CoeffReturnType operator()(int row, int col) const;
Scalar& coeffRef(int row, int col);
Scalar& operator()(int row, int col);
const Scalar coeff(int index) const;
const Scalar operator[](int index) const;
const Scalar operator()(int index) const;
const CoeffReturnType coeff(int index) const;
const CoeffReturnType operator[](int index) const;
const CoeffReturnType operator()(int index) const;
Scalar& coeffRef(int index);
Scalar& operator[](int index);
@@ -292,10 +306,10 @@ template<typename Derived> class MatrixBase
template<int StoreMode>
void writePacket(int index, const PacketScalar& x);
const Scalar x() const;
const Scalar y() const;
const Scalar z() const;
const Scalar w() const;
const CoeffReturnType x() const;
const CoeffReturnType y() const;
const CoeffReturnType z() const;
const CoeffReturnType w() const;
Scalar& x();
Scalar& y();
Scalar& z();
@@ -324,6 +338,9 @@ template<typename Derived> class MatrixBase
Derived& operator/=(const Scalar& other);
const ScalarMultipleReturnType operator*(const Scalar& scalar) const;
#ifdef EIGEN_PARSED_BY_DOXYGEN
const ScalarMultipleReturnType operator*(const RealScalar& scalar) const;
#endif
const CwiseUnaryOp<ei_scalar_quotient1_op<typename ei_traits<Derived>::Scalar>, Derived>
operator/(const Scalar& scalar) const;
@@ -351,6 +368,7 @@ template<typename Derived> class MatrixBase
Scalar dot(const MatrixBase<OtherDerived>& other) const;
RealScalar squaredNorm() const;
RealScalar norm() const;
RealScalar stableNorm() const;
const PlainMatrixType normalized() const;
void normalize();
@@ -358,7 +376,7 @@ template<typename Derived> class MatrixBase
const Eigen::Transpose<Derived> transpose() const;
void transposeInPlace();
const AdjointReturnType adjoint() const;
void adjointInPlace();
RowXpr row(int i);
const RowXpr row(int i) const;
@@ -404,9 +422,15 @@ template<typename Derived> class MatrixBase
template<int Size> typename BlockReturnType<Derived,Size>::SubVectorType segment(int start);
template<int Size> const typename BlockReturnType<Derived,Size>::SubVectorType segment(int start) const;
DiagonalCoeffs<Derived> diagonal();
const DiagonalCoeffs<Derived> diagonal() const;
Diagonal<Derived,0> diagonal();
const Diagonal<Derived,0> diagonal() const;
template<int Index> Diagonal<Derived,Index> diagonal();
template<int Index> const Diagonal<Derived,Index> diagonal() const;
Diagonal<Derived, Dynamic> diagonal(int index);
const Diagonal<Derived, Dynamic> diagonal(int index) const;
template<unsigned int Mode> Part<Derived, Mode> part();
template<unsigned int Mode> const Part<Derived, Mode> part() const;
@@ -443,7 +467,7 @@ template<typename Derived> class MatrixBase
static const BasisReturnType UnitZ();
static const BasisReturnType UnitW();
const DiagonalMatrix<Derived> asDiagonal() const;
const DiagonalMatrixWrapper<Derived> asDiagonal() const;
void fill(const Scalar& value);
Derived& setConstant(const Scalar& value);
@@ -487,8 +511,12 @@ template<typename Derived> class MatrixBase
template<typename NewType>
const CwiseUnaryOp<ei_scalar_cast_op<typename ei_traits<Derived>::Scalar, NewType>, Derived> cast() const;
typename ei_cast_return_type<
Derived,
const CwiseUnaryOp<ei_scalar_cast_op<typename ei_traits<Derived>::Scalar, NewType>, Derived>
>::type
cast() const;
/** \returns the matrix or vector obtained by evaluating this expression.
*
* Notice that in the case of a plain matrix or vector (not an expression) this function just returns
@@ -529,6 +557,8 @@ template<typename Derived> class MatrixBase
Scalar sum() const;
Scalar trace() const;
Scalar prod() const;
typename ei_traits<Derived>::Scalar minCoeff() const;
typename ei_traits<Derived>::Scalar maxCoeff() const;
@@ -582,9 +612,18 @@ template<typename Derived> class MatrixBase
template<int p> RealScalar lpNorm() const;
template<int RowFactor, int ColFactor>
const Replicate<Derived,RowFactor,ColFactor> replicate() const;
const Replicate<Derived,Dynamic,Dynamic> replicate(int rowFacor,int colFactor) const;
Eigen::Reverse<Derived, BothDirections> reverse();
const Eigen::Reverse<Derived, BothDirections> reverse() const;
void reverseInPlace();
/////////// LU module ///////////
const LU<PlainMatrixType> lu() const;
const PartialLU<PlainMatrixType> partialLu() const;
const PlainMatrixType inverse() const;
void computeInverse(PlainMatrixType *result) const;
Scalar determinant() const;
@@ -609,8 +648,23 @@ template<typename Derived> class MatrixBase
template<typename OtherDerived>
PlainMatrixType cross(const MatrixBase<OtherDerived>& other) const;
template<typename OtherDerived>
PlainMatrixType cross3(const MatrixBase<OtherDerived>& other) const;
PlainMatrixType unitOrthogonal(void) const;
Matrix<Scalar,3,1> eulerAngles(int a0, int a1, int a2) const;
const ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
enum {
SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
};
typedef Block<Derived,
ei_traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
ei_traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> StartMinusOne;
typedef CwiseUnaryOp<ei_scalar_quotient1_op<typename ei_traits<Derived>::Scalar>,
NestByValue<StartMinusOne> > HNormalizedReturnType;
const HNormalizedReturnType hnormalized() const;
typedef Homogeneous<Derived,MatrixBase<Derived>::ColsAtCompileTime==1?Vertical:Horizontal> HomogeneousReturnType;
const HomogeneousReturnType homogeneous() const;
/////////// Sparse module ///////////
@@ -621,6 +675,9 @@ template<typename Derived> class MatrixBase
template<typename Derived1, typename Derived2>
Derived& lazyAssign(const SparseProduct<Derived1,Derived2,DenseTimeSparseProduct>& product);
template<typename OtherDerived,typename OtherEvalType>
Derived& operator=(const ReturnByValue<OtherDerived,OtherEvalType>& func);
#ifdef EIGEN_MATRIXBASE_PLUGIN
#include EIGEN_MATRIXBASE_PLUGIN
#endif

View File

@@ -32,7 +32,7 @@ struct ei_constructor_without_unaligned_array_assert {};
* Static array automatically aligned if the total byte size is a multiple of 16 and the matrix options require auto alignment
*/
template <typename T, int Size, int MatrixOptions,
bool Align = (MatrixOptions&AutoAlign) && (((Size*sizeof(T))&0xf)==0)
bool Align = (!(MatrixOptions&DontAlign)) && (((Size*sizeof(T))&0xf)==0)
> struct ei_matrix_array
{
EIGEN_ALIGN_128 T array[Size];
@@ -85,6 +85,21 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class ei_matr
inline T *data() { return m_data.array; }
};
// null matrix
template<typename T, int _Rows, int _Cols, int _Options> class ei_matrix_storage<T, 0, _Rows, _Cols, _Options>
{
public:
inline explicit ei_matrix_storage() {}
inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert) {}
inline ei_matrix_storage(int,int,int) {}
inline void swap(ei_matrix_storage& other) {}
inline static int rows(void) {return _Rows;}
inline static int cols(void) {return _Cols;}
inline void resize(int,int,int) {}
inline const T *data() const { return 0; }
inline T *data() { return 0; }
};
// dynamic-size matrix with fixed-size storage
template<typename T, int Size, int _Options> class ei_matrix_storage<T, Size, Dynamic, Dynamic, _Options>
{

View File

@@ -54,7 +54,7 @@ template<typename ExpressionType> class NestByValue
inline int cols() const { return m_expression.cols(); }
inline int stride() const { return m_expression.stride(); }
inline const Scalar coeff(int row, int col) const
inline const CoeffReturnType coeff(int row, int col) const
{
return m_expression.coeff(row, col);
}
@@ -64,7 +64,7 @@ template<typename ExpressionType> class NestByValue
return m_expression.const_cast_derived().coeffRef(row, col);
}
inline const Scalar coeff(int index) const
inline const CoeffReturnType coeff(int index) const
{
return m_expression.coeff(index);
}
@@ -97,6 +97,8 @@ template<typename ExpressionType> class NestByValue
{
m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
}
operator const ExpressionType&() const { return m_expression; }
protected:
const ExpressionType m_expression;

View File

@@ -84,7 +84,7 @@ template<typename Lhs, typename Rhs> struct ei_product_mode
{
enum{
value = ((Rhs::Flags&Diagonal)==Diagonal) || ((Lhs::Flags&Diagonal)==Diagonal)
value = ei_is_diagonal<Rhs>::ret || ei_is_diagonal<Lhs>::ret
? DiagonalProduct
: Lhs::MaxColsAtCompileTime == Dynamic
&& ( Lhs::MaxRowsAtCompileTime == Dynamic

View File

@@ -26,48 +26,155 @@
#ifndef EIGEN_REDUX_H
#define EIGEN_REDUX_H
template<typename BinaryOp, typename Derived, int Start, int Length>
struct ei_redux_impl
// TODO
// * implement other kind of vectorization
// * factorize code
/***************************************************************************
* Part 1 : the logic deciding a strategy for vectorization and unrolling
***************************************************************************/
template<typename Func, typename Derived>
struct ei_redux_traits
{
private:
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
InnerMaxSize = int(Derived::Flags)&RowMajorBit
? Derived::MaxColsAtCompileTime
: Derived::MaxRowsAtCompileTime
};
enum {
MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
&& (ei_functor_traits<Func>::PacketAccess),
MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
};
public:
enum {
Vectorization = int(MayLinearVectorize) ? int(LinearVectorization)
: int(MaySliceVectorize) ? int(SliceVectorization)
: int(NoVectorization)
};
private:
enum {
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
+ (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::AddCost,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
};
public:
enum {
Unrolling = Cost <= UnrollingLimit
? CompleteUnrolling
: NoUnrolling
};
};
/***************************************************************************
* Part 2 : unrollers
***************************************************************************/
/*** no vectorization ***/
template<typename Func, typename Derived, int Start, int Length>
struct ei_redux_novec_unroller
{
enum {
HalfLength = Length/2
};
typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
typedef typename Derived::Scalar Scalar;
static Scalar run(const Derived &mat, const BinaryOp& func)
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func& func)
{
return func(
ei_redux_impl<BinaryOp, Derived, Start, HalfLength>::run(mat, func),
ei_redux_impl<BinaryOp, Derived, Start+HalfLength, Length - HalfLength>::run(mat, func));
return func(ei_redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
ei_redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
}
};
template<typename BinaryOp, typename Derived, int Start>
struct ei_redux_impl<BinaryOp, Derived, Start, 1>
template<typename Func, typename Derived, int Start>
struct ei_redux_novec_unroller<Func, Derived, Start, 1>
{
enum {
col = Start / Derived::RowsAtCompileTime,
row = Start % Derived::RowsAtCompileTime
};
typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
typedef typename Derived::Scalar Scalar;
static Scalar run(const Derived &mat, const BinaryOp &)
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func&)
{
return mat.coeff(row, col);
}
};
template<typename BinaryOp, typename Derived, int Start>
struct ei_redux_impl<BinaryOp, Derived, Start, Dynamic>
/*** vectorization ***/
template<typename Func, typename Derived, int Start, int Length>
struct ei_redux_vec_unroller
{
typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
static Scalar run(const Derived& mat, const BinaryOp& func)
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func& func)
{
return func.packetOp(
ei_redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
ei_redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
}
};
template<typename Func, typename Derived, int Start>
struct ei_redux_vec_unroller<Func, Derived, Start, 1>
{
enum {
index = Start * ei_packet_traits<typename Derived::Scalar>::size,
row = int(Derived::Flags)&RowMajorBit
? index / int(Derived::ColsAtCompileTime)
: index % Derived::RowsAtCompileTime,
col = int(Derived::Flags)&RowMajorBit
? index % int(Derived::ColsAtCompileTime)
: index / Derived::RowsAtCompileTime,
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&)
{
return mat.template packet<alignment>(row, col);
}
};
/***************************************************************************
* Part 3 : implementation of all cases
***************************************************************************/
template<typename Func, typename Derived,
int Vectorization = ei_redux_traits<Func, Derived>::Vectorization,
int Unrolling = ei_redux_traits<Func, Derived>::Unrolling
>
struct ei_redux_impl;
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
static Scalar run(const Derived& mat, const Func& func)
{
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix");
Scalar res;
res = mat.coeff(0,0);
res = mat.coeff(0, 0);
for(int i = 1; i < mat.rows(); ++i)
res = func(res, mat.coeff(i, 0));
for(int j = 1; j < mat.cols(); ++j)
@@ -77,6 +184,116 @@ struct ei_redux_impl<BinaryOp, Derived, Start, Dynamic>
}
};
template<typename Func, typename Derived>
struct ei_redux_impl<Func,Derived, NoVectorization, CompleteUnrolling>
: public ei_redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
{};
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, LinearVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
static Scalar run(const Derived& mat, const Func& func)
{
const int size = mat.size();
const int packetSize = ei_packet_traits<Scalar>::size;
const int alignedStart = (Derived::Flags & AlignedBit)
|| !(Derived::Flags & DirectAccessBit)
? 0
: ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size);
enum {
alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
? Aligned : Unaligned
};
const int alignedSize = ((size-alignedStart)/packetSize)*packetSize;
const int alignedEnd = alignedStart + alignedSize;
Scalar res;
if(alignedSize)
{
PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
for(int index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
packet_res = func.packetOp(packet_res, mat.template packet<alignment>(index));
res = func.predux(packet_res);
for(int index = 0; index < alignedStart; ++index)
res = func(res,mat.coeff(index));
for(int index = alignedEnd; index < size; ++index)
res = func(res,mat.coeff(index));
}
else // too small to vectorize anything.
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
{
res = mat.coeff(0);
for(int index = 1; index < size; ++index)
res = func(res,mat.coeff(index));
}
return res;
}
};
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
static Scalar run(const Derived& mat, const Func& func)
{
const int innerSize = mat.innerSize();
const int outerSize = mat.outerSize();
enum {
packetSize = ei_packet_traits<Scalar>::size,
isRowMajor = Derived::Flags&RowMajorBit?1:0
};
const int packetedInnerSize = ((innerSize)/packetSize)*packetSize;
Scalar res;
if(packetedInnerSize)
{
PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
for(int j=0; j<outerSize; ++j)
for(int i=0; i<packetedInnerSize; i+=int(packetSize))
packet_res = func.packetOp(packet_res, mat.template packet<Unaligned>
(isRowMajor?j:i, isRowMajor?i:j));
res = func.predux(packet_res);
for(int j=0; j<outerSize; ++j)
for(int i=packetedInnerSize; i<innerSize; ++i)
res = func(res, mat.coeff(isRowMajor?j:i, isRowMajor?i:j));
}
else // too small to vectorize anything.
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
{
res = ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>::run(mat, func);
}
return res;
}
};
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, LinearVectorization, CompleteUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
Size = Derived::SizeAtCompileTime,
VectorizationSize = (Size / PacketSize) * PacketSize
};
EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
{
Scalar res = func.predux(ei_redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
if (VectorizationSize != Size)
res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizationSize, Size-VectorizationSize>::run(mat,func));
return res;
}
};
/** \returns the result of a full redux operation on the whole matrix or vector using \a func
*
* The template parameter \a BinaryOp is the type of the functor \a func which must be
@@ -85,21 +302,20 @@ struct ei_redux_impl<BinaryOp, Derived, Start, Dynamic>
* \sa MatrixBase::sum(), MatrixBase::minCoeff(), MatrixBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
*/
template<typename Derived>
template<typename BinaryOp>
typename ei_result_of<BinaryOp(typename ei_traits<Derived>::Scalar)>::type
MatrixBase<Derived>::redux(const BinaryOp& func) const
template<typename Func>
inline typename ei_result_of<Func(typename ei_traits<Derived>::Scalar)>::type
MatrixBase<Derived>::redux(const Func& func) const
{
const bool unroll = SizeAtCompileTime * CoeffReadCost
+ (SizeAtCompileTime-1) * ei_functor_traits<BinaryOp>::Cost
<= EIGEN_UNROLLING_LIMIT;
return ei_redux_impl<BinaryOp, Derived, 0, unroll ? int(SizeAtCompileTime) : Dynamic>
::run(derived(), func);
typename Derived::Nested nested(derived());
typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested;
return ei_redux_impl<Func, ThisNested>
::run(nested, func);
}
/** \returns the minimum of all coefficients of *this
*/
template<typename Derived>
inline typename ei_traits<Derived>::Scalar
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::minCoeff() const
{
return this->redux(Eigen::ei_scalar_min_op<Scalar>());
@@ -108,10 +324,48 @@ MatrixBase<Derived>::minCoeff() const
/** \returns the maximum of all coefficients of *this
*/
template<typename Derived>
inline typename ei_traits<Derived>::Scalar
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::maxCoeff() const
{
return this->redux(Eigen::ei_scalar_max_op<Scalar>());
}
/** \returns the sum of all coefficients of *this
*
* \sa trace(), prod()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::sum() const
{
return this->redux(Eigen::ei_scalar_sum_op<Scalar>());
}
/** \returns the product of all coefficients of *this
*
* Example: \include MatrixBase_prod.cpp
* Output: \verbinclude MatrixBase_prod.out
*
* \sa sum()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::prod() const
{
return this->redux(Eigen::ei_scalar_product_op<Scalar>());
}
/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
*
* \c *this can be any matrix, not necessarily square.
*
* \sa diagonal(), sum()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::trace() const
{
return diagonal().sum();
}
#endif // EIGEN_REDUX_H

View File

@@ -0,0 +1,73 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_RETURNBYVALUE_H
#define EIGEN_RETURNBYVALUE_H
/** \class ReturnByValue
*
*/
template<typename Functor, typename _Scalar,int _Rows,int _Cols,int _Options,int _MaxRows,int _MaxCols>
struct ei_traits<ReturnByValue<Functor,Matrix<_Scalar,_Rows,_Cols,_Options,_MaxRows,_MaxCols> > >
: public ei_traits<Matrix<_Scalar,_Rows,_Cols,_Options,_MaxRows,_MaxCols> >
{
enum {
Flags = ei_traits<Matrix<_Scalar,_Rows,_Cols,_Options,_MaxRows,_MaxCols> >::Flags | EvalBeforeNestingBit
};
};
template<typename Functor,typename EvalTypeDerived,int n>
struct ei_nested<ReturnByValue<Functor,MatrixBase<EvalTypeDerived> >, n, EvalTypeDerived>
{
typedef EvalTypeDerived type;
};
template<typename Functor, typename EvalType> class ReturnByValue
{
public:
template<typename Dest>
inline void evalTo(Dest& dst) const
{ static_cast<const Functor*>(this)->evalTo(dst); }
};
template<typename Functor, typename _Scalar,int _Rows,int _Cols,int _Options,int _MaxRows,int _MaxCols>
class ReturnByValue<Functor,Matrix<_Scalar,_Rows,_Cols,_Options,_MaxRows,_MaxCols> >
: public MatrixBase<ReturnByValue<Functor,Matrix<_Scalar,_Rows,_Cols,_Options,_MaxRows,_MaxCols> > >
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(ReturnByValue)
template<typename Dest>
inline void evalTo(Dest& dst) const
{ static_cast<const Functor* const>(this)->evalTo(dst); }
};
template<typename Derived>
template<typename OtherDerived,typename OtherEvalType>
Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived,OtherEvalType>& other)
{
other.evalTo(derived());
return derived();
}
#endif // EIGEN_RETURNBYVALUE_H

View File

@@ -33,8 +33,8 @@ template<typename Lhs, typename Rhs,
? LowerTriangular
: (int(Lhs::Flags) & UpperTriangularBit)
? UpperTriangular
: -1,
int StorageOrder = ei_is_part<Lhs>::value ? -1 // this is to solve ambiguous specializations
: 0xffffff,
int StorageOrder = ei_is_part<Lhs>::value ? 0xffffff // this is to solve ambiguous specializations
: int(Lhs::Flags) & (RowMajorBit|SparseBit)
>
struct ei_solve_triangular_selector;

View File

@@ -1,271 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_SUM_H
#define EIGEN_SUM_H
/***************************************************************************
* Part 1 : the logic deciding a strategy for vectorization and unrolling
***************************************************************************/
template<typename Derived>
struct ei_sum_traits
{
private:
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size
};
public:
enum {
Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
&& (int(Derived::Flags)&LinearAccessBit)
? LinearVectorization
: NoVectorization
};
private:
enum {
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
+ (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::AddCost,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
};
public:
enum {
Unrolling = Cost <= UnrollingLimit
? CompleteUnrolling
: NoUnrolling
};
};
/***************************************************************************
* Part 2 : unrollers
***************************************************************************/
/*** no vectorization ***/
template<typename Derived, int Start, int Length>
struct ei_sum_novec_unroller
{
enum {
HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
inline static Scalar run(const Derived &mat)
{
return ei_sum_novec_unroller<Derived, Start, HalfLength>::run(mat)
+ ei_sum_novec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat);
}
};
template<typename Derived, int Start>
struct ei_sum_novec_unroller<Derived, Start, 1>
{
enum {
col = Start / Derived::RowsAtCompileTime,
row = Start % Derived::RowsAtCompileTime
};
typedef typename Derived::Scalar Scalar;
inline static Scalar run(const Derived &mat)
{
return mat.coeff(row, col);
}
};
/*** vectorization ***/
template<typename Derived, int Start, int Length>
struct ei_sum_vec_unroller
{
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
inline static PacketScalar run(const Derived &mat)
{
return ei_padd(
ei_sum_vec_unroller<Derived, Start, HalfLength>::run(mat),
ei_sum_vec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat) );
}
};
template<typename Derived, int Start>
struct ei_sum_vec_unroller<Derived, Start, 1>
{
enum {
index = Start * ei_packet_traits<typename Derived::Scalar>::size,
row = int(Derived::Flags)&RowMajorBit
? index / int(Derived::ColsAtCompileTime)
: index % Derived::RowsAtCompileTime,
col = int(Derived::Flags)&RowMajorBit
? index % int(Derived::ColsAtCompileTime)
: index / Derived::RowsAtCompileTime,
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
inline static PacketScalar run(const Derived &mat)
{
return mat.template packet<alignment>(row, col);
}
};
/***************************************************************************
* Part 3 : implementation of all cases
***************************************************************************/
template<typename Derived,
int Vectorization = ei_sum_traits<Derived>::Vectorization,
int Unrolling = ei_sum_traits<Derived>::Unrolling
>
struct ei_sum_impl;
template<typename Derived>
struct ei_sum_impl<Derived, NoVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
static Scalar run(const Derived& mat)
{
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix");
Scalar res;
res = mat.coeff(0, 0);
for(int i = 1; i < mat.rows(); ++i)
res += mat.coeff(i, 0);
for(int j = 1; j < mat.cols(); ++j)
for(int i = 0; i < mat.rows(); ++i)
res += mat.coeff(i, j);
return res;
}
};
template<typename Derived>
struct ei_sum_impl<Derived, NoVectorization, CompleteUnrolling>
: public ei_sum_novec_unroller<Derived, 0, Derived::SizeAtCompileTime>
{};
template<typename Derived>
struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
static Scalar run(const Derived& mat)
{
const int size = mat.size();
const int packetSize = ei_packet_traits<Scalar>::size;
const int alignedStart = (Derived::Flags & AlignedBit)
|| !(Derived::Flags & DirectAccessBit)
? 0
: ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size);
enum {
alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
? Aligned : Unaligned
};
const int alignedSize = ((size-alignedStart)/packetSize)*packetSize;
const int alignedEnd = alignedStart + alignedSize;
Scalar res;
if(alignedSize)
{
PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
for(int index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
packet_res = ei_padd(packet_res, mat.template packet<alignment>(index));
res = ei_predux(packet_res);
}
else // too small to vectorize anything.
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
{
res = Scalar(0);
}
for(int index = 0; index < alignedStart; ++index)
res += mat.coeff(index);
for(int index = alignedEnd; index < size; ++index)
res += mat.coeff(index);
return res;
}
};
template<typename Derived>
struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
Size = Derived::SizeAtCompileTime,
VectorizationSize = (Size / PacketSize) * PacketSize
};
static Scalar run(const Derived& mat)
{
Scalar res = ei_predux(ei_sum_vec_unroller<Derived, 0, Size / PacketSize>::run(mat));
if (VectorizationSize != Size)
res += ei_sum_novec_unroller<Derived, VectorizationSize, Size-VectorizationSize>::run(mat);
return res;
}
};
/***************************************************************************
* Part 4 : implementation of MatrixBase methods
***************************************************************************/
/** \returns the sum of all coefficients of *this
*
* \sa trace()
*/
template<typename Derived>
inline typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::sum() const
{
return ei_sum_impl<Derived>::run(derived());
}
/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
*
* \c *this can be any matrix, not necessarily square.
*
* \sa diagonal(), sum()
*/
template<typename Derived>
inline typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::trace() const
{
return diagonal().sum();
}
#endif // EIGEN_SUM_H

View File

@@ -77,21 +77,21 @@ template<typename MatrixType> class Transpose
return m_matrix.const_cast_derived().coeffRef(col, row);
}
inline const Scalar coeff(int row, int col) const
inline Scalar& coeffRef(int index)
{
return m_matrix.const_cast_derived().coeffRef(index);
}
inline const CoeffReturnType coeff(int row, int col) const
{
return m_matrix.coeff(col, row);
}
inline const Scalar coeff(int index) const
inline const CoeffReturnType coeff(int index) const
{
return m_matrix.coeff(index);
}
inline Scalar& coeffRef(int index)
{
return m_matrix.const_cast_derived().coeffRef(index);
}
template<int LoadMode>
inline const PacketScalar packet(int row, int col) const
{
@@ -125,7 +125,20 @@ template<typename MatrixType> class Transpose
* Example: \include MatrixBase_transpose.cpp
* Output: \verbinclude MatrixBase_transpose.out
*
* \sa adjoint(), class DiagonalCoeffs */
* \warning If you want to replace a matrix by its own transpose, do \b NOT do this:
* \code
* m = m.transpose(); // bug!!! caused by aliasing effect
* \endcode
* Instead, use the transposeInPlace() method:
* \code
* m.transposeInPlace();
* \endcode
* which gives Eigen good opportunities for optimization, or alternatively you can also do:
* \code
* m = m.transpose().eval();
* \endcode
*
* \sa transposeInPlace(), adjoint() */
template<typename Derived>
inline Transpose<Derived>
MatrixBase<Derived>::transpose()
@@ -133,7 +146,11 @@ MatrixBase<Derived>::transpose()
return derived();
}
/** This is the const version of transpose(). \sa adjoint() */
/** This is the const version of transpose().
*
* Make sure you read the warning for transpose() !
*
* \sa transposeInPlace(), adjoint() */
template<typename Derived>
inline const Transpose<Derived>
MatrixBase<Derived>::transpose() const
@@ -146,7 +163,20 @@ MatrixBase<Derived>::transpose() const
* Example: \include MatrixBase_adjoint.cpp
* Output: \verbinclude MatrixBase_adjoint.out
*
* \sa transpose(), conjugate(), class Transpose, class ei_scalar_conjugate_op */
* \warning If you want to replace a matrix by its own adjoint, do \b NOT do this:
* \code
* m = m.adjoint(); // bug!!! caused by aliasing effect
* \endcode
* Instead, use the adjointInPlace() method:
* \code
* m.adjointInPlace();
* \endcode
* which gives Eigen good opportunities for optimization, or alternatively you can also do:
* \code
* m = m.adjoint().eval();
* \endcode
*
* \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class ei_scalar_conjugate_op */
template<typename Derived>
inline const typename MatrixBase<Derived>::AdjointReturnType
MatrixBase<Derived>::adjoint() const
@@ -179,24 +209,56 @@ struct ei_inplace_transpose_selector<MatrixType,false> { // non square matrix
}
};
/** This is the "in place" version of transpose: it transposes \c *this.
/** This is the "in place" version of transpose(): it replaces \c *this by its own transpose.
* Thus, doing
* \code
* m.transposeInPlace();
* \endcode
* has the same effect on m as doing
* \code
* m = m.transpose().eval();
* \endcode
* and is faster and also safer because in the latter line of code, forgetting the eval() results
* in a bug caused by aliasing.
*
* In most cases it is probably better to simply use the transposed expression
* of a matrix. However, when transposing the matrix data itself is really needed,
* then this "in-place" version is probably the right choice because it provides
* the following additional features:
* - less error prone: doing the same operation with .transpose() requires special care:
* \code m = m.transpose().eval(); \endcode
* - no temporary object is created (currently only for squared matrices)
* - it allows future optimizations (cache friendliness, etc.)
* Notice however that this method is only useful if you want to replace a matrix by its own transpose.
* If you just need the transpose of a matrix, use transpose().
*
* \note if the matrix is not square, then \c *this must be a resizable matrix.
*
* \sa transpose(), adjoint() */
* \sa transpose(), adjoint(), adjointInPlace() */
template<typename Derived>
inline void MatrixBase<Derived>::transposeInPlace()
{
ei_inplace_transpose_selector<Derived>::run(derived());
}
/***************************************************************************
* "in place" adjoint implementation
***************************************************************************/
/** This is the "in place" version of adjoint(): it replaces \c *this by its own transpose.
* Thus, doing
* \code
* m.adjointInPlace();
* \endcode
* has the same effect on m as doing
* \code
* m = m.adjoint().eval();
* \endcode
* and is faster and also safer because in the latter line of code, forgetting the eval() results
* in a bug caused by aliasing.
*
* Notice however that this method is only useful if you want to replace a matrix by its own adjoint.
* If you just need the adjoint of a matrix, use adjoint().
*
* \note if the matrix is not square, then \c *this must be a resizable matrix.
*
* \sa transpose(), adjoint(), transposeInPlace() */
template<typename Derived>
inline void MatrixBase<Derived>::adjointInPlace()
{
derived() = adjoint().eval();
}
#endif // EIGEN_TRANSPOSE_H

View File

@@ -2,5 +2,5 @@ FILE(GLOB Eigen_Core_arch_AltiVec_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_arch_AltiVec_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AltiVec
)
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AltiVec COMPONENT Devel
)

View File

@@ -45,8 +45,10 @@ typedef __vector __bool int v4bi;
#define USE_CONST_v1i_ const v4ui v1i_ = vec_splat_u32(-1)
#define USE_CONST_v0f_ USE_CONST_v1i_; const v4f v0f_ = (v4f) vec_sl(v1i_, v1i_)
template<> struct ei_packet_traits<float> { typedef v4f type; enum {size=4}; };
template<> struct ei_packet_traits<int> { typedef v4i type; enum {size=4}; };
template<> struct ei_packet_traits<float> : ei_default_packet_traits
{ typedef v4f type; enum {size=4}; };
template<> struct ei_packet_traits<int> : ei_default_packet_traits
{ typedef v4i type; enum {size=4}; };
template<> struct ei_unpacket_traits<v4f> { typedef float type; enum {size=4}; };
template<> struct ei_unpacket_traits<v4i> { typedef int type; enum {size=4}; };
@@ -101,6 +103,18 @@ template<> inline v4i ei_padd(const v4i& a, const v4i& b) { return vec_add(
template<> inline v4f ei_psub(const v4f& a, const v4f& b) { return vec_sub(a,b); }
template<> inline v4i ei_psub(const v4i& a, const v4i& b) { return vec_sub(a,b); }
template<> EIGEN_STRONG_INLINE v4f ei_pnegate(const v4f& a)
{
v4i mask = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
return vec_xor(a,(v4f) mask);
}
template<> EIGEN_STRONG_INLINE v4i ei_pnegate(const v4i& a)
{
USE_CONST_v0i;
return ei_psub(v0i, a);
}
template<> inline v4f ei_pmul(const v4f& a, const v4f& b) { USE_CONST_v0f; return vec_madd(a,b, v0f); }
template<> inline v4i ei_pmul(const v4i& a, const v4i& b)
{
@@ -111,7 +125,7 @@ template<> inline v4i ei_pmul(const v4i& a, const v4i& b)
USE_CONST_v1i;
USE_CONST_v16i_;
// Get the absolute values
// Get the absolute values
a1 = vec_abs(a);
b1 = vec_abs(b);
@@ -146,7 +160,7 @@ template<> inline v4f ei_pdiv(const v4f& a, const v4f& b) {
// Altivec does not offer a divide instruction, we have to do a reciprocal approximation
y_0 = vec_re(b);
// Do one Newton-Raphson iteration to get the needed accuracy
t = vec_nmsub(y_0, b, v1f);
y_1 = vec_madd(y_0, t, y_0);
@@ -163,6 +177,9 @@ template<> inline v4i ei_pmin(const v4i& a, const v4i& b) { return vec_min(
template<> inline v4f ei_pmax(const v4f& a, const v4f& b) { return vec_max(a,b); }
template<> inline v4i ei_pmax(const v4i& a, const v4i& b) { return vec_max(a,b); }
template<> EIGEN_STRONG_INLINE v4f ei_pabs(const v4f& a) { return vec_abs(a); }
template<> EIGEN_STRONG_INLINE v4i ei_pabs(const v4i& a) { return vec_abs(a); }
template<> inline v4f ei_pload(const float* from) { return vec_ld(0, from); }
template<> inline v4i ei_pload(const int* from) { return vec_ld(0, from); }
@@ -177,7 +194,7 @@ template<> inline v4f ei_ploadu(const float* from)
return (v4f) vec_perm(MSQ, LSQ, mask); // align the data
}
template<> inline v4i ei_ploadu(const int* from)
template<> inline v4i ei_ploadu(const int* from)
{
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
__vector unsigned char MSQ, LSQ;
@@ -198,7 +215,7 @@ template<> inline v4f ei_pset1(const float& from)
return vc;
}
template<> inline v4i ei_pset1(const int& from)
template<> inline v4i ei_pset1(const int& from)
{
int __attribute__(aligned(16)) ai[4];
ai[0] = from;
@@ -248,18 +265,31 @@ template<> inline void ei_pstoreu(int* to , const v4i& from )
template<> inline float ei_pfirst(const v4f& a)
{
float __attribute__(aligned(16)) af[4];
float EIGEN_ALIGN_128 af[4];
vec_st(a, 0, af);
return af[0];
}
template<> inline int ei_pfirst(const v4i& a)
{
int __attribute__(aligned(16)) ai[4];
int EIGEN_ALIGN_128 ai[4];
vec_st(a, 0, ai);
return ai[0];
}
template<> EIGEN_STRONG_INLINE v4f ei_preverse(const v4f& a)
{
static const __vector unsigned char reverse_mask =
{12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
return (v4f)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask);
}
template<> EIGEN_STRONG_INLINE v4i ei_preverse(const v4i& a)
{
static const __vector unsigned char __attribute__(aligned(16)) reverse_mask =
{12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
return (v4i)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask);
}
inline v4f ei_preduxp(const v4f* vecs)
{
v4f v[4], sum[4];
@@ -287,6 +317,33 @@ inline v4f ei_preduxp(const v4f* vecs)
return sum[0];
}
inline v4i ei_preduxp(const v4i* vecs)
{
v4i v[4], sum[4];
// It's easier and faster to transpose then add as columns
// Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
// Do the transpose, first set of moves
v[0] = vec_mergeh(vecs[0], vecs[2]);
v[1] = vec_mergel(vecs[0], vecs[2]);
v[2] = vec_mergeh(vecs[1], vecs[3]);
v[3] = vec_mergel(vecs[1], vecs[3]);
// Get the resulting vectors
sum[0] = vec_mergeh(v[0], v[2]);
sum[1] = vec_mergel(v[0], v[2]);
sum[2] = vec_mergeh(v[1], v[3]);
sum[3] = vec_mergel(v[1], v[3]);
// Now do the summation:
// Lines 0+1
sum[0] = vec_add(sum[0], sum[1]);
// Lines 2+3
sum[1] = vec_add(sum[2], sum[3]);
// Add the results
sum[0] = vec_add(sum[0], sum[1]);
return sum[0];
}
inline float ei_predux(const v4f& a)
{
v4f b, sum;
@@ -297,33 +354,6 @@ inline float ei_predux(const v4f& a)
return ei_pfirst(sum);
}
inline v4i ei_preduxp(const v4i* vecs)
{
v4i v[4], sum[4];
// It's easier and faster to transpose then add as columns
// Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
// Do the transpose, first set of moves
v[0] = vec_mergeh(vecs[0], vecs[2]);
v[1] = vec_mergel(vecs[0], vecs[2]);
v[2] = vec_mergeh(vecs[1], vecs[3]);
v[3] = vec_mergel(vecs[1], vecs[3]);
// Get the resulting vectors
sum[0] = vec_mergeh(v[0], v[2]);
sum[1] = vec_mergel(v[0], v[2]);
sum[2] = vec_mergeh(v[1], v[3]);
sum[3] = vec_mergel(v[1], v[3]);
// Now do the summation:
// Lines 0+1
sum[0] = vec_add(sum[0], sum[1]);
// Lines 2+3
sum[1] = vec_add(sum[2], sum[3]);
// Add the results
sum[0] = vec_add(sum[0], sum[1]);
return sum[0];
}
inline int ei_predux(const v4i& a)
{
USE_CONST_v0i;
@@ -333,6 +363,55 @@ inline int ei_predux(const v4i& a)
return ei_pfirst(sum);
}
// implement other reductions operators
inline float ei_predux_mul(const v4f& a)
{
v4f prod;
prod = ei_pmul(a, (v4f)vec_sld(a, a, 8));
return ei_pfirst(ei_pmul(prod, (v4f)vec_sld(prod, prod, 4)));
}
inline int ei_predux_mul(const v4i& a)
{
EIGEN_ALIGN_128 int aux[4];
ei_pstore(aux, a);
return aux[0] * aux[1] * aux[2] * aux[3];
}
inline float ei_predux_min(const v4f& a)
{
v4f b, res;
b = vec_min(a, vec_sld(a, a, 8));
res = vec_min(b, vec_sld(b, b, 4));
return ei_pfirst(res);
}
inline int ei_predux_min(const v4i& a)
{
v4i b, res;
b = vec_min(a, vec_sld(a, a, 8));
res = vec_min(b, vec_sld(b, b, 4));
return ei_pfirst(res);
}
inline float ei_predux_max(const v4f& a)
{
v4f b, res;
b = vec_max(a, vec_sld(a, a, 8));
res = vec_max(b, vec_sld(b, b, 4));
return ei_pfirst(res);
}
inline int ei_predux_max(const v4i& a)
{
v4i b, res;
b = vec_max(a, vec_sld(a, a, 8));
res = vec_max(b, vec_sld(b, b, 4));
return ei_pfirst(res);
}
template<int Offset>
struct ei_palign_impl<Offset, v4f>
{

View File

@@ -2,5 +2,5 @@ FILE(GLOB Eigen_Core_arch_SSE_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_arch_SSE_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/SSE
)
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/SSE COMPONENT Devel
)

View File

@@ -0,0 +1,376 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2007 Julien Pommier
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
/* The sin, cos, exp, and log functions of this file come from
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
*/
#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
#define EIGEN_MATH_FUNCTIONS_SSE_H
static EIGEN_DONT_INLINE EIGEN_UNUSED Packet4f ei_plog(Packet4f x)
{
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
/* the smallest non denormalized float number */
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
/* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0
*/
_EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
Packet4i emm0;
Packet4f invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
x = ei_pmax(x, ei_p4f_min_norm_pos); /* cut off denormalized stuff */
emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
/* keep only the fractional part */
x = _mm_and_ps(x, ei_p4f_inv_mant_mask);
x = _mm_or_ps(x, ei_p4f_half);
emm0 = _mm_sub_epi32(emm0, ei_p4i_0x7f);
Packet4f e = ei_padd(_mm_cvtepi32_ps(emm0), ei_p4f_1);
/* part2:
if( x < SQRTHF ) {
e -= 1;
x = x + x - 1.0;
} else { x = x - 1.0; }
*/
Packet4f mask = _mm_cmplt_ps(x, ei_p4f_cephes_SQRTHF);
Packet4f tmp = _mm_and_ps(x, mask);
x = ei_psub(x, ei_p4f_1);
e = ei_psub(e, _mm_and_ps(ei_p4f_1, mask));
x = ei_padd(x, tmp);
Packet4f x2 = ei_pmul(x,x);
Packet4f x3 = ei_pmul(x2,x);
Packet4f y, y1, y2;
y = ei_pmadd(ei_p4f_cephes_log_p0, x, ei_p4f_cephes_log_p1);
y1 = ei_pmadd(ei_p4f_cephes_log_p3, x, ei_p4f_cephes_log_p4);
y2 = ei_pmadd(ei_p4f_cephes_log_p6, x, ei_p4f_cephes_log_p7);
y = ei_pmadd(y , x, ei_p4f_cephes_log_p2);
y1 = ei_pmadd(y1, x, ei_p4f_cephes_log_p5);
y2 = ei_pmadd(y2, x, ei_p4f_cephes_log_p8);
y = ei_pmadd(y, x3, y1);
y = ei_pmadd(y, x3, y2);
y = ei_pmul(y, x3);
y1 = ei_pmul(e, ei_p4f_cephes_log_q1);
tmp = ei_pmul(x2, ei_p4f_half);
y = ei_padd(y, y1);
x = ei_psub(x, tmp);
y2 = ei_pmul(e, ei_p4f_cephes_log_q2);
x = ei_padd(x, y);
x = ei_padd(x, y2);
return _mm_or_ps(x, invalid_mask); // negative arg will be NAN
}
static EIGEN_DONT_INLINE EIGEN_UNUSED Packet4f ei_pexp(Packet4f x)
{
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647949f);
_EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
Packet4f tmp = _mm_setzero_ps(), fx;
Packet4i emm0;
// clamp x
x = ei_pmax(ei_pmin(x, ei_p4f_exp_hi), ei_p4f_exp_lo);
/* express exp(x) as exp(g + n*log(2)) */
fx = ei_pmadd(x, ei_p4f_cephes_LOG2EF, ei_p4f_half);
/* how to perform a floorf with SSE: just below */
emm0 = _mm_cvttps_epi32(fx);
tmp = _mm_cvtepi32_ps(emm0);
/* if greater, substract 1 */
Packet4f mask = _mm_cmpgt_ps(tmp, fx);
mask = _mm_and_ps(mask, ei_p4f_1);
fx = ei_psub(tmp, mask);
tmp = ei_pmul(fx, ei_p4f_cephes_exp_C1);
Packet4f z = ei_pmul(fx, ei_p4f_cephes_exp_C2);
x = ei_psub(x, tmp);
x = ei_psub(x, z);
z = ei_pmul(x,x);
Packet4f y = ei_p4f_cephes_exp_p0;
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p1);
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p2);
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p3);
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p4);
y = ei_pmadd(y, x, ei_p4f_cephes_exp_p5);
y = ei_pmadd(y, z, x);
y = ei_padd(y, ei_p4f_1);
/* build 2^n */
emm0 = _mm_cvttps_epi32(fx);
emm0 = _mm_add_epi32(emm0, ei_p4i_0x7f);
emm0 = _mm_slli_epi32(emm0, 23);
return ei_pmul(y, _mm_castsi128_ps(emm0));
}
/* evaluation of 4 sines at onces, using SSE2 intrinsics.
The code is the exact rewriting of the cephes sinf function.
Precision is excellent as long as x < 8192 (I did not bother to
take into account the special handling they have for greater values
-- it does not return garbage for arguments over 8192, though, but
the extra precision is missing).
Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
surprising but correct result.
*/
static EIGEN_DONT_INLINE EIGEN_UNUSED Packet4f ei_psin(Packet4f x)
{
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
_EIGEN_DECLARE_CONST_Packet4i(1, 1);
_EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
_EIGEN_DECLARE_CONST_Packet4i(2, 2);
_EIGEN_DECLARE_CONST_Packet4i(4, 4);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
Packet4i emm0, emm2;
sign_bit = x;
/* take the absolute value */
x = ei_pabs(x);
/* take the modulo */
/* extract the sign bit (upper one) */
sign_bit = _mm_and_ps(sign_bit, ei_p4f_sign_mask);
/* scale by 4/Pi */
y = ei_pmul(x, ei_p4f_cephes_FOPI);
/* store the integer part of y in mm0 */
emm2 = _mm_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */
emm2 = _mm_add_epi32(emm2, ei_p4i_1);
emm2 = _mm_and_si128(emm2, ei_p4i_not1);
y = _mm_cvtepi32_ps(emm2);
/* get the swap sign flag */
emm0 = _mm_and_si128(emm2, ei_p4i_4);
emm0 = _mm_slli_epi32(emm0, 29);
/* get the polynom selection mask
there is one polynom for 0 <= x <= Pi/4
and another one for Pi/4<x<=Pi/2
Both branches will be computed.
*/
emm2 = _mm_and_si128(emm2, ei_p4i_2);
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
Packet4f poly_mask = _mm_castsi128_ps(emm2);
sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = ei_pmul(y, ei_p4f_minus_cephes_DP1);
xmm2 = ei_pmul(y, ei_p4f_minus_cephes_DP2);
xmm3 = ei_pmul(y, ei_p4f_minus_cephes_DP3);
x = ei_padd(x, xmm1);
x = ei_padd(x, xmm2);
x = ei_padd(x, xmm3);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
y = ei_p4f_coscof_p0;
Packet4f z = _mm_mul_ps(x,x);
y = ei_pmadd(y, z, ei_p4f_coscof_p1);
y = ei_pmadd(y, z, ei_p4f_coscof_p2);
y = ei_pmul(y, z);
y = ei_pmul(y, z);
Packet4f tmp = ei_pmul(z, ei_p4f_half);
y = ei_psub(y, tmp);
y = ei_padd(y, ei_p4f_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
Packet4f y2 = ei_p4f_sincof_p0;
y2 = ei_pmadd(y2, z, ei_p4f_sincof_p1);
y2 = ei_pmadd(y2, z, ei_p4f_sincof_p2);
y2 = ei_pmul(y2, z);
y2 = ei_pmul(y2, x);
y2 = ei_padd(y2, x);
/* select the correct result from the two polynoms */
y2 = _mm_and_ps(poly_mask, y2);
y = _mm_andnot_ps(poly_mask, y);
y = _mm_or_ps(y,y2);
/* update the sign */
return _mm_xor_ps(y, sign_bit);
}
/* almost the same as ei_psin */
static EIGEN_DONT_INLINE EIGEN_UNUSED Packet4f ei_pcos(Packet4f x)
{
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
_EIGEN_DECLARE_CONST_Packet4i(1, 1);
_EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
_EIGEN_DECLARE_CONST_Packet4i(2, 2);
_EIGEN_DECLARE_CONST_Packet4i(4, 4);
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
Packet4i emm0, emm2;
x = ei_pabs(x);
/* scale by 4/Pi */
y = ei_pmul(x, ei_p4f_cephes_FOPI);
/* get the integer part of y */
emm2 = _mm_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */
emm2 = _mm_add_epi32(emm2, ei_p4i_1);
emm2 = _mm_and_si128(emm2, ei_p4i_not1);
y = _mm_cvtepi32_ps(emm2);
emm2 = _mm_sub_epi32(emm2, ei_p4i_2);
/* get the swap sign flag */
emm0 = _mm_andnot_si128(emm2, ei_p4i_4);
emm0 = _mm_slli_epi32(emm0, 29);
/* get the polynom selection mask */
emm2 = _mm_and_si128(emm2, ei_p4i_2);
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
Packet4f sign_bit = _mm_castsi128_ps(emm0);
Packet4f poly_mask = _mm_castsi128_ps(emm2);
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = ei_pmul(y, ei_p4f_minus_cephes_DP1);
xmm2 = ei_pmul(y, ei_p4f_minus_cephes_DP2);
xmm3 = ei_pmul(y, ei_p4f_minus_cephes_DP3);
x = ei_padd(x, xmm1);
x = ei_padd(x, xmm2);
x = ei_padd(x, xmm3);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
y = ei_p4f_coscof_p0;
Packet4f z = ei_pmul(x,x);
y = ei_pmadd(y,z,ei_p4f_coscof_p1);
y = ei_pmadd(y,z,ei_p4f_coscof_p2);
y = ei_pmul(y, z);
y = ei_pmul(y, z);
Packet4f tmp = _mm_mul_ps(z, ei_p4f_half);
y = ei_psub(y, tmp);
y = ei_padd(y, ei_p4f_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
Packet4f y2 = ei_p4f_sincof_p0;
y2 = ei_pmadd(y2, z, ei_p4f_sincof_p1);
y2 = ei_pmadd(y2, z, ei_p4f_sincof_p2);
y2 = ei_pmul(y2, z);
y2 = ei_pmadd(y2, x, x);
/* select the correct result from the two polynoms */
y2 = _mm_and_ps(poly_mask, y2);
y = _mm_andnot_ps(poly_mask, y);
y = _mm_or_ps(y,y2);
/* update the sign */
return _mm_xor_ps(y, sign_bit);
}
static EIGEN_UNUSED Packet4f ei_psqrt(Packet4f _x)
{
Packet4f half = ei_pmul(_x, ei_pset1(.5f));
Packet4f x = _mm_rsqrt_ps(_x);
x = ei_pmul(x, ei_psub(ei_pset1(1.5f), ei_pmul(half, ei_pmul(x,x))));
return ei_pmul(_x,x);
}
#endif // EIGEN_MATH_FUNCTIONS_SSE_H

View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -29,147 +29,245 @@
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 16
#endif
template<> struct ei_packet_traits<float> { typedef __m128 type; enum {size=4}; };
template<> struct ei_packet_traits<double> { typedef __m128d type; enum {size=2}; };
template<> struct ei_packet_traits<int> { typedef __m128i type; enum {size=4}; };
typedef __m128 Packet4f;
typedef __m128i Packet4i;
typedef __m128d Packet2d;
template<> struct ei_unpacket_traits<__m128> { typedef float type; enum {size=4}; };
template<> struct ei_unpacket_traits<__m128d> { typedef double type; enum {size=2}; };
template<> struct ei_unpacket_traits<__m128i> { typedef int type; enum {size=4}; };
#define ei_vec4f_swizzle1(v,p,q,r,s) \
(_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
template<> EIGEN_STRONG_INLINE __m128 ei_pset1<float>(const float& from) { return _mm_set1_ps(from); }
template<> EIGEN_STRONG_INLINE __m128d ei_pset1<double>(const double& from) { return _mm_set1_pd(from); }
template<> EIGEN_STRONG_INLINE __m128i ei_pset1<int>(const int& from) { return _mm_set1_epi32(from); }
#define ei_vec4i_swizzle1(v,p,q,r,s) \
(_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
template<> EIGEN_STRONG_INLINE __m128 ei_padd<__m128>(const __m128& a, const __m128& b) { return _mm_add_ps(a,b); }
template<> EIGEN_STRONG_INLINE __m128d ei_padd<__m128d>(const __m128d& a, const __m128d& b) { return _mm_add_pd(a,b); }
template<> EIGEN_STRONG_INLINE __m128i ei_padd<__m128i>(const __m128i& a, const __m128i& b) { return _mm_add_epi32(a,b); }
#define ei_vec4f_swizzle2(a,b,p,q,r,s) \
(_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
template<> EIGEN_STRONG_INLINE __m128 ei_psub<__m128>(const __m128& a, const __m128& b) { return _mm_sub_ps(a,b); }
template<> EIGEN_STRONG_INLINE __m128d ei_psub<__m128d>(const __m128d& a, const __m128d& b) { return _mm_sub_pd(a,b); }
template<> EIGEN_STRONG_INLINE __m128i ei_psub<__m128i>(const __m128i& a, const __m128i& b) { return _mm_sub_epi32(a,b); }
#define ei_vec4i_swizzle2(a,b,p,q,r,s) \
(_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
const Packet4f ei_p4f_##NAME = ei_pset1<float>(X)
template<> EIGEN_STRONG_INLINE __m128 ei_pmul<__m128>(const __m128& a, const __m128& b) { return _mm_mul_ps(a,b); }
template<> EIGEN_STRONG_INLINE __m128d ei_pmul<__m128d>(const __m128d& a, const __m128d& b) { return _mm_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE __m128i ei_pmul<__m128i>(const __m128i& a, const __m128i& b)
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
const Packet4f ei_p4f_##NAME = _mm_castsi128_ps(ei_pset1<int>(X))
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i ei_p4i_##NAME = ei_pset1<int>(X)
template<> struct ei_packet_traits<float> : ei_default_packet_traits
{
return _mm_or_si128(
_mm_and_si128(
_mm_mul_epu32(a,b),
_mm_setr_epi32(0xffffffff,0,0xffffffff,0)),
_mm_slli_si128(
_mm_and_si128(
_mm_mul_epu32(_mm_srli_si128(a,4),_mm_srli_si128(b,4)),
_mm_setr_epi32(0xffffffff,0,0xffffffff,0)), 4));
typedef Packet4f type; enum {size=4};
enum {
HasSin = 1,
HasCos = 1,
HasLog = 1,
HasExp = 1,
HasSqrt = 1
};
};
template<> struct ei_packet_traits<double> : ei_default_packet_traits
{ typedef Packet2d type; enum {size=2}; };
template<> struct ei_packet_traits<int> : ei_default_packet_traits
{ typedef Packet4i type; enum {size=4}; };
template<> struct ei_unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
template<> struct ei_unpacket_traits<Packet2d> { typedef double type; enum {size=2}; };
template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return _mm_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) { return _mm_set1_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) { return _mm_set1_epi32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pnegate(const Packet4f& a)
{
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
return _mm_xor_ps(a,mask);
}
template<> EIGEN_STRONG_INLINE Packet2d ei_pnegate(const Packet2d& a)
{
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
return _mm_xor_pd(a,mask);
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pnegate(const Packet4i& a)
{
return ei_psub(_mm_setr_epi32(0,0,0,0), a);
}
template<> EIGEN_STRONG_INLINE __m128 ei_pdiv<__m128>(const __m128& a, const __m128& b) { return _mm_div_ps(a,b); }
template<> EIGEN_STRONG_INLINE __m128d ei_pdiv<__m128d>(const __m128d& a, const __m128d& b) { return _mm_div_pd(a,b); }
template<> EIGEN_STRONG_INLINE __m128i ei_pdiv<__m128i>(const __m128i& /*a*/, const __m128i& /*b*/)
template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
{
// this version is slightly faster than 4 scalar products
return ei_vec4i_swizzle1(
ei_vec4i_swizzle2(
_mm_mul_epu32(a,b),
_mm_mul_epu32(ei_vec4i_swizzle1(a,1,0,3,2),
ei_vec4i_swizzle1(b,1,0,3,2)),
0,2,0,2),
0,2,1,3);
}
template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
{ ei_assert(false && "packet integer division are not supported by SSE");
__m128i dummy = ei_pset1<int>(0);
return dummy;
return ei_pset1<int>(0);
}
// for some weird raisons, it has to be overloaded for packet integer
template<> EIGEN_STRONG_INLINE __m128i ei_pmadd(const __m128i& a, const __m128i& b, const __m128i& c) { return ei_padd(ei_pmul(a,b), c); }
// for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet4i ei_pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return ei_padd(ei_pmul(a,b), c); }
template<> EIGEN_STRONG_INLINE __m128 ei_pmin<__m128>(const __m128& a, const __m128& b) { return _mm_min_ps(a,b); }
template<> EIGEN_STRONG_INLINE __m128d ei_pmin<__m128d>(const __m128d& a, const __m128d& b) { return _mm_min_pd(a,b); }
// FIXME this vectorized min operator is likely to be slower than the standard one
template<> EIGEN_STRONG_INLINE __m128i ei_pmin<__m128i>(const __m128i& a, const __m128i& b)
template<> EIGEN_STRONG_INLINE Packet4f ei_pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
{
__m128i mask = _mm_cmplt_epi32(a,b);
// after some bench, this version *is* faster than a scalar implementation
Packet4i mask = _mm_cmplt_epi32(a,b);
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
}
template<> EIGEN_STRONG_INLINE __m128 ei_pmax<__m128>(const __m128& a, const __m128& b) { return _mm_max_ps(a,b); }
template<> EIGEN_STRONG_INLINE __m128d ei_pmax<__m128d>(const __m128d& a, const __m128d& b) { return _mm_max_pd(a,b); }
// FIXME this vectorized max operator is likely to be slower than the standard one
template<> EIGEN_STRONG_INLINE __m128i ei_pmax<__m128i>(const __m128i& a, const __m128i& b)
template<> EIGEN_STRONG_INLINE Packet4f ei_pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
{
__m128i mask = _mm_cmpgt_epi32(a,b);
// after some bench, this version *is* faster than a scalar implementation
Packet4i mask = _mm_cmpgt_epi32(a,b);
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
}
template<> EIGEN_STRONG_INLINE __m128 ei_pload<float>(const float* from) { return _mm_load_ps(from); }
template<> EIGEN_STRONG_INLINE __m128d ei_pload<double>(const double* from) { return _mm_load_pd(from); }
template<> EIGEN_STRONG_INLINE __m128i ei_pload<int>(const int* from) { return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
template<> EIGEN_STRONG_INLINE __m128 ei_ploadu<float>(const float* from) { return _mm_loadu_ps(from); }
// template<> EIGEN_STRONG_INLINE __m128 ei_ploadu(const float* from) {
// if (size_t(from)&0xF)
// return _mm_loadu_ps(from);
// else
// return _mm_loadu_ps(from);
// }
template<> EIGEN_STRONG_INLINE __m128d ei_ploadu<double>(const double* from) { return _mm_loadu_pd(from); }
template<> EIGEN_STRONG_INLINE __m128i ei_ploadu<int>(const int* from) { return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); }
template<> EIGEN_STRONG_INLINE Packet4f ei_por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const __m128& from) { _mm_store_ps(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstore<double>(double* to, const __m128d& from) { _mm_store_pd(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const __m128i& from) { _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const __m128& from) { _mm_storeu_ps(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<double>(double* to, const __m128d& from) { _mm_storeu_pd(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const __m128i& from) { _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { return _mm_load_ps(from); }
template<> EIGEN_STRONG_INLINE Packet2d ei_pload<double>(const double* from) { return _mm_load_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) {
Packet4f r;
r = _mm_castpd_ps(_mm_load_sd((double*)(from)));
r = _mm_loadh_pi(r, (const __m64*)(from+2));
return r;
}
template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<double>(const double* from) { return _mm_castps_pd(ei_ploadu((const float*)(from))); }
template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<int>(const int* from) { return _mm_castpd_si128(ei_ploadu((const double*)(from))); }
template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { _mm_store_ps(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstore<double>(double* to, const Packet2d& from) { _mm_store_pd(to, from); }
template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<double>(double* to, const Packet2d& from) {
_mm_storel_pd((to), from);
_mm_storeh_pd((to+1), from);
}
template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) { ei_pstoreu((double*)to, _mm_castps_pd(from)); }
template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) { ei_pstoreu((double*)to, _mm_castsi128_pd(from)); }
#ifdef _MSC_VER
// this fix internal compilation error
template<> EIGEN_STRONG_INLINE float ei_pfirst<__m128>(const __m128& a) { float x = _mm_cvtss_f32(a); return x; }
template<> EIGEN_STRONG_INLINE double ei_pfirst<__m128d>(const __m128d& a) { double x = _mm_cvtsd_f64(a); return x; }
template<> EIGEN_STRONG_INLINE int ei_pfirst<__m128i>(const __m128i& a) { int x = _mm_cvtsi128_si32(a); return x; }
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
#else
template<> EIGEN_STRONG_INLINE float ei_pfirst<__m128>(const __m128& a) { return _mm_cvtss_f32(a); }
template<> EIGEN_STRONG_INLINE double ei_pfirst<__m128d>(const __m128d& a) { return _mm_cvtsd_f64(a); }
template<> EIGEN_STRONG_INLINE int ei_pfirst<__m128i>(const __m128i& a) { return _mm_cvtsi128_si32(a); }
template<> EIGEN_STRONG_INLINE float ei_pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
template<> EIGEN_STRONG_INLINE double ei_pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
template<> EIGEN_STRONG_INLINE int ei_pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
#endif
template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a)
{ return _mm_shuffle_ps(a,a,0x1B); }
template<> EIGEN_STRONG_INLINE Packet2d ei_preverse(const Packet2d& a)
{ return _mm_shuffle_pd(a,a,0x1); }
template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a)
{ return _mm_shuffle_epi32(a,0x1B); }
template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a)
{
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
return _mm_and_ps(a,mask);
}
template<> EIGEN_STRONG_INLINE Packet2d ei_pabs(const Packet2d& a)
{
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
return _mm_and_pd(a,mask);
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
{
#ifdef __SSSE3__
return _mm_abs_epi32(a);
#else
Packet4i aux = _mm_srai_epi32(a,31);
return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
#endif
}
#ifdef __SSE3__
// TODO implement SSE2 versions as well as integer versions
template<> EIGEN_STRONG_INLINE __m128 ei_preduxp<__m128>(const __m128* vecs)
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
{
return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
}
template<> EIGEN_STRONG_INLINE __m128d ei_preduxp<__m128d>(const __m128d* vecs)
template<> EIGEN_STRONG_INLINE Packet2d ei_preduxp<Packet2d>(const Packet2d* vecs)
{
return _mm_hadd_pd(vecs[0], vecs[1]);
}
// SSSE3 version:
// EIGEN_STRONG_INLINE __m128i ei_preduxp(const __m128i* vecs)
// EIGEN_STRONG_INLINE Packet4i ei_preduxp(const Packet4i* vecs)
// {
// return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
// }
template<> EIGEN_STRONG_INLINE float ei_predux<__m128>(const __m128& a)
template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a)
{
__m128 tmp0 = _mm_hadd_ps(a,a);
Packet4f tmp0 = _mm_hadd_ps(a,a);
return ei_pfirst(_mm_hadd_ps(tmp0, tmp0));
}
template<> EIGEN_STRONG_INLINE double ei_predux<__m128d>(const __m128d& a) { return ei_pfirst(_mm_hadd_pd(a, a)); }
template<> EIGEN_STRONG_INLINE double ei_predux<Packet2d>(const Packet2d& a) { return ei_pfirst(_mm_hadd_pd(a, a)); }
// SSSE3 version:
// EIGEN_STRONG_INLINE float ei_predux(const __m128i& a)
// EIGEN_STRONG_INLINE float ei_predux(const Packet4i& a)
// {
// __m128i tmp0 = _mm_hadd_epi32(a,a);
// Packet4i tmp0 = _mm_hadd_epi32(a,a);
// return ei_pfirst(_mm_hadd_epi32(tmp0, tmp0));
// }
#else
// SSE2 versions
template<> EIGEN_STRONG_INLINE float ei_predux<__m128>(const __m128& a)
template<> EIGEN_STRONG_INLINE float ei_predux<Packet4f>(const Packet4f& a)
{
__m128 tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double ei_predux<__m128d>(const __m128d& a)
template<> EIGEN_STRONG_INLINE double ei_predux<Packet2d>(const Packet2d& a)
{
return ei_pfirst(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE __m128 ei_preduxp<__m128>(const __m128* vecs)
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
{
__m128 tmp0, tmp1, tmp2;
Packet4f tmp0, tmp1, tmp2;
tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
@@ -181,21 +279,21 @@ template<> EIGEN_STRONG_INLINE __m128 ei_preduxp<__m128>(const __m128* vecs)
return _mm_add_ps(tmp0, tmp2);
}
template<> EIGEN_STRONG_INLINE __m128d ei_preduxp<__m128d>(const __m128d* vecs)
template<> EIGEN_STRONG_INLINE Packet2d ei_preduxp<Packet2d>(const Packet2d* vecs)
{
return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
}
#endif // SSE3
template<> EIGEN_STRONG_INLINE int ei_predux<__m128i>(const __m128i& a)
template<> EIGEN_STRONG_INLINE int ei_predux<Packet4i>(const Packet4i& a)
{
__m128i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
return ei_pfirst(tmp) + ei_pfirst(_mm_shuffle_epi32(tmp, 1));
}
template<> EIGEN_STRONG_INLINE __m128i ei_preduxp<__m128i>(const __m128i* vecs)
template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp<Packet4i>(const Packet4i* vecs)
{
__m128i tmp0, tmp1, tmp2;
Packet4i tmp0, tmp1, tmp2;
tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
@@ -207,16 +305,80 @@ template<> EIGEN_STRONG_INLINE __m128i ei_preduxp<__m128i>(const __m128i* vecs)
return _mm_add_epi32(tmp0, tmp2);
}
// Other reduction functions:
// mul
template<> EIGEN_STRONG_INLINE float ei_predux_mul<Packet4f>(const Packet4f& a)
{
Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double ei_predux_mul<Packet2d>(const Packet2d& a)
{
return ei_pfirst(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a)
{
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., reusing ei_pmul is very slow !)
// TODO try to call _mm_mul_epu32 directly
EIGEN_ALIGN_128 int aux[4];
ei_pstore(aux, a);
return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
}
// min
template<> EIGEN_STRONG_INLINE float ei_predux_min<Packet4f>(const Packet4f& a)
{
Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double ei_predux_min<Packet2d>(const Packet2d& a)
{
return ei_pfirst(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a)
{
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the ei_pstore !!)
EIGEN_ALIGN_128 int aux[4];
ei_pstore(aux, a);
register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
return aux0<aux2 ? aux0 : aux2;
}
// max
template<> EIGEN_STRONG_INLINE float ei_predux_max<Packet4f>(const Packet4f& a)
{
Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double ei_predux_max<Packet2d>(const Packet2d& a)
{
return ei_pfirst(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
{
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the ei_pstore !!)
EIGEN_ALIGN_128 int aux[4];
ei_pstore(aux, a);
register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
return aux0>aux2 ? aux0 : aux2;
}
#if (defined __GNUC__)
// template <> EIGEN_STRONG_INLINE __m128 ei_pmadd(const __m128& a, const __m128& b, const __m128& c)
// template <> EIGEN_STRONG_INLINE Packet4f ei_pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
// {
// __m128 res = b;
// Packet4f res = b;
// asm("mulps %[a], %[b] \n\taddps %[c], %[b]" : [b] "+x" (res) : [a] "x" (a), [c] "x" (c));
// return res;
// }
// EIGEN_STRONG_INLINE __m128i _mm_alignr_epi8(const __m128i& a, const __m128i& b, const int i)
// EIGEN_STRONG_INLINE Packet4i _mm_alignr_epi8(const Packet4i& a, const Packet4i& b, const int i)
// {
// __m128i res = a;
// Packet4i res = a;
// asm("palignr %[i], %[a], %[b] " : [b] "+x" (res) : [a] "x" (a), [i] "i" (i));
// return res;
// }
@@ -225,9 +387,9 @@ template<> EIGEN_STRONG_INLINE __m128i ei_preduxp<__m128i>(const __m128i* vecs)
#ifdef __SSSE3__
// SSSE3 versions
template<int Offset>
struct ei_palign_impl<Offset,__m128>
struct ei_palign_impl<Offset,Packet4f>
{
EIGEN_STRONG_INLINE static void run(__m128& first, const __m128& second)
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{
if (Offset!=0)
first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
@@ -235,9 +397,9 @@ struct ei_palign_impl<Offset,__m128>
};
template<int Offset>
struct ei_palign_impl<Offset,__m128i>
struct ei_palign_impl<Offset,Packet4i>
{
EIGEN_STRONG_INLINE static void run(__m128i& first, const __m128i& second)
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{
if (Offset!=0)
first = _mm_alignr_epi8(second,first, Offset*4);
@@ -245,9 +407,9 @@ struct ei_palign_impl<Offset,__m128i>
};
template<int Offset>
struct ei_palign_impl<Offset,__m128d>
struct ei_palign_impl<Offset,Packet2d>
{
EIGEN_STRONG_INLINE static void run(__m128d& first, const __m128d& second)
EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
{
if (Offset==1)
first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
@@ -256,9 +418,9 @@ struct ei_palign_impl<Offset,__m128d>
#else
// SSE2 versions
template<int Offset>
struct ei_palign_impl<Offset,__m128>
struct ei_palign_impl<Offset,Packet4f>
{
EIGEN_STRONG_INLINE static void run(__m128& first, const __m128& second)
EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second)
{
if (Offset==1)
{
@@ -279,9 +441,9 @@ struct ei_palign_impl<Offset,__m128>
};
template<int Offset>
struct ei_palign_impl<Offset,__m128i>
struct ei_palign_impl<Offset,Packet4i>
{
EIGEN_STRONG_INLINE static void run(__m128i& first, const __m128i& second)
EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second)
{
if (Offset==1)
{
@@ -302,9 +464,9 @@ struct ei_palign_impl<Offset,__m128i>
};
template<int Offset>
struct ei_palign_impl<Offset,__m128d>
struct ei_palign_impl<Offset,Packet2d>
{
EIGEN_STRONG_INLINE static void run(__m128d& first, const __m128d& second)
EIGEN_STRONG_INLINE static void run(Packet2d& first, const Packet2d& second)
{
if (Offset==1)
{

View File

@@ -0,0 +1,490 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
#define EIGEN_GENERAL_MATRIX_MATRIX_H
template <int L2MemorySize,typename Scalar>
struct ei_L2_block_traits {
enum {width = 8 * ei_meta_sqrt<L2MemorySize/(64*sizeof(Scalar))>::ret };
};
#ifndef EIGEN_EXTERN_INSTANTIATIONS
template<typename Scalar>
static void ei_cache_friendly_product(
int _rows, int _cols, int depth,
bool _lhsRowMajor, const Scalar* _lhs, int _lhsStride,
bool _rhsRowMajor, const Scalar* _rhs, int _rhsStride,
bool resRowMajor, Scalar* res, int resStride)
{
const Scalar* EIGEN_RESTRICT lhs;
const Scalar* EIGEN_RESTRICT rhs;
int lhsStride, rhsStride, rows, cols;
bool lhsRowMajor;
if (resRowMajor)
{
lhs = _rhs;
rhs = _lhs;
lhsStride = _rhsStride;
rhsStride = _lhsStride;
cols = _rows;
rows = _cols;
lhsRowMajor = !_rhsRowMajor;
ei_assert(_lhsRowMajor);
}
else
{
lhs = _lhs;
rhs = _rhs;
lhsStride = _lhsStride;
rhsStride = _rhsStride;
rows = _rows;
cols = _cols;
lhsRowMajor = _lhsRowMajor;
ei_assert(!_rhsRowMajor);
}
typedef typename ei_packet_traits<Scalar>::type PacketType;
#ifndef EIGEN_USE_ALT_PRODUCT
enum {
PacketSize = sizeof(PacketType)/sizeof(Scalar),
#if (defined __i386__)
HalfRegisterCount = 4,
#else
HalfRegisterCount = 8,
#endif
// register block size along the N direction
nr = HalfRegisterCount/2,
// register block size along the M direction
mr = 2 * PacketSize,
// max cache block size along the K direction
Max_kc = ei_L2_block_traits<EIGEN_TUNE_FOR_CPU_CACHE_SIZE,Scalar>::width,
// max cache block size along the M direction
Max_mc = 2*Max_kc
};
int kc = std::min<int>(Max_kc,depth); // cache block size along the K direction
int mc = std::min<int>(Max_mc,rows); // cache block size along the M direction
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
Scalar* blockB = ei_aligned_stack_new(Scalar, kc*cols*PacketSize);
// number of columns which can be processed by packet of nr columns
int packet_cols = (cols/nr)*nr;
// GEMM_VAR1
for(int k2=0; k2<depth; k2+=kc)
{
const int actual_kc = std::min(k2+kc,depth)-k2;
// we have selected one row panel of rhs and one column panel of lhs
// pack rhs's panel into a sequential chunk of memory
// and expand each coeff to a constant packet for further reuse
{
int count = 0;
for(int j2=0; j2<packet_cols; j2+=nr)
{
const Scalar* b0 = &rhs[(j2+0)*rhsStride + k2];
const Scalar* b1 = &rhs[(j2+1)*rhsStride + k2];
const Scalar* b2 = &rhs[(j2+2)*rhsStride + k2];
const Scalar* b3 = &rhs[(j2+3)*rhsStride + k2];
for(int k=0; k<actual_kc; k++)
{
ei_pstore(&blockB[count+0*PacketSize], ei_pset1(b0[k]));
ei_pstore(&blockB[count+1*PacketSize], ei_pset1(b1[k]));
if (nr==4)
{
ei_pstore(&blockB[count+2*PacketSize], ei_pset1(b2[k]));
ei_pstore(&blockB[count+3*PacketSize], ei_pset1(b3[k]));
}
count += nr*PacketSize;
}
}
}
// => GEPP_VAR1
for(int i2=0; i2<rows; i2+=mc)
{
const int actual_mc = std::min(i2+mc,rows)-i2;
// We have selected a mc x kc block of lhs
// Let's pack it in a clever order for further purely sequential access
int count = 0;
const int peeled_mc = (actual_mc/mr)*mr;
if (lhsRowMajor)
{
for(int i=0; i<peeled_mc; i+=mr)
for(int k=0; k<actual_kc; k++)
for(int w=0; w<mr; w++)
blockA[count++] = lhs[(k2+k) + (i2+i+w)*lhsStride];
for(int i=peeled_mc; i<actual_mc; i++)
{
const Scalar* llhs = &lhs[(k2) + (i2+i)*lhsStride];
for(int k=0; k<actual_kc; k++)
blockA[count++] = llhs[k];
}
}
else
{
for(int i=0; i<peeled_mc; i+=mr)
for(int k=0; k<actual_kc; k++)
for(int w=0; w<mr; w++)
blockA[count++] = lhs[(k2+k)*lhsStride + i2+i+w];
for(int i=peeled_mc; i<actual_mc; i++)
for(int k=0; k<actual_kc; k++)
blockA[count++] = lhs[(k2+k)*lhsStride + i2+i];
}
// GEBP
// loops on each cache friendly block of the result/rhs
for(int j2=0; j2<packet_cols; j2+=nr)
{
// loops on each register blocking of lhs/res
const int peeled_mc = (actual_mc/mr)*mr;
for(int i=0; i<peeled_mc; i+=mr)
{
const Scalar* blA = &blockA[i*actual_kc];
#ifdef EIGEN_VECTORIZE_SSE
_mm_prefetch((const char*)(&blA[0]), _MM_HINT_T0);
#endif
// TODO move the res loads to the stores
// gets res block as register
PacketType C0, C1, C2, C3, C4, C5, C6, C7;
C0 = ei_ploadu(&res[(j2+0)*resStride + i2 + i]);
C1 = ei_ploadu(&res[(j2+1)*resStride + i2 + i]);
if(nr==4) C2 = ei_ploadu(&res[(j2+2)*resStride + i2 + i]);
if(nr==4) C3 = ei_ploadu(&res[(j2+3)*resStride + i2 + i]);
C4 = ei_ploadu(&res[(j2+0)*resStride + i2 + i + PacketSize]);
C5 = ei_ploadu(&res[(j2+1)*resStride + i2 + i + PacketSize]);
if(nr==4) C6 = ei_ploadu(&res[(j2+2)*resStride + i2 + i + PacketSize]);
if(nr==4) C7 = ei_ploadu(&res[(j2+3)*resStride + i2 + i + PacketSize]);
// performs "inner" product
// TODO let's check wether the flowing peeled loop could not be
// optimized via optimal prefetching from one loop to the other
const Scalar* blB = &blockB[j2*actual_kc*PacketSize];
const int peeled_kc = (actual_kc/4)*4;
for(int k=0; k<peeled_kc; k+=4)
{
PacketType B0, B1, B2, B3, A0, A1;
A0 = ei_pload(&blA[0*PacketSize]);
A1 = ei_pload(&blA[1*PacketSize]);
B0 = ei_pload(&blB[0*PacketSize]);
B1 = ei_pload(&blB[1*PacketSize]);
C0 = ei_pmadd(B0, A0, C0);
if(nr==4) B2 = ei_pload(&blB[2*PacketSize]);
C4 = ei_pmadd(B0, A1, C4);
if(nr==4) B3 = ei_pload(&blB[3*PacketSize]);
B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]);
C1 = ei_pmadd(B1, A0, C1);
C5 = ei_pmadd(B1, A1, C5);
B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]);
if(nr==4) C2 = ei_pmadd(B2, A0, C2);
if(nr==4) C6 = ei_pmadd(B2, A1, C6);
if(nr==4) B2 = ei_pload(&blB[6*PacketSize]);
if(nr==4) C3 = ei_pmadd(B3, A0, C3);
A0 = ei_pload(&blA[2*PacketSize]);
if(nr==4) C7 = ei_pmadd(B3, A1, C7);
A1 = ei_pload(&blA[3*PacketSize]);
if(nr==4) B3 = ei_pload(&blB[7*PacketSize]);
C0 = ei_pmadd(B0, A0, C0);
C4 = ei_pmadd(B0, A1, C4);
B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]);
C1 = ei_pmadd(B1, A0, C1);
C5 = ei_pmadd(B1, A1, C5);
B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]);
if(nr==4) C2 = ei_pmadd(B2, A0, C2);
if(nr==4) C6 = ei_pmadd(B2, A1, C6);
if(nr==4) B2 = ei_pload(&blB[10*PacketSize]);
if(nr==4) C3 = ei_pmadd(B3, A0, C3);
A0 = ei_pload(&blA[4*PacketSize]);
if(nr==4) C7 = ei_pmadd(B3, A1, C7);
A1 = ei_pload(&blA[5*PacketSize]);
if(nr==4) B3 = ei_pload(&blB[11*PacketSize]);
C0 = ei_pmadd(B0, A0, C0);
C4 = ei_pmadd(B0, A1, C4);
B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]);
C1 = ei_pmadd(B1, A0, C1);
C5 = ei_pmadd(B1, A1, C5);
B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]);
if(nr==4) C2 = ei_pmadd(B2, A0, C2);
if(nr==4) C6 = ei_pmadd(B2, A1, C6);
if(nr==4) B2 = ei_pload(&blB[14*PacketSize]);
if(nr==4) C3 = ei_pmadd(B3, A0, C3);
A0 = ei_pload(&blA[6*PacketSize]);
if(nr==4) C7 = ei_pmadd(B3, A1, C7);
A1 = ei_pload(&blA[7*PacketSize]);
if(nr==4) B3 = ei_pload(&blB[15*PacketSize]);
C0 = ei_pmadd(B0, A0, C0);
C4 = ei_pmadd(B0, A1, C4);
C1 = ei_pmadd(B1, A0, C1);
C5 = ei_pmadd(B1, A1, C5);
if(nr==4) C2 = ei_pmadd(B2, A0, C2);
if(nr==4) C6 = ei_pmadd(B2, A1, C6);
if(nr==4) C3 = ei_pmadd(B3, A0, C3);
if(nr==4) C7 = ei_pmadd(B3, A1, C7);
blB += 4*nr*PacketSize;
blA += 4*mr;
}
// process remaining peeled loop
for(int k=peeled_kc; k<actual_kc; k++)
{
PacketType B0, B1, B2, B3, A0, A1;
A0 = ei_pload(&blA[0*PacketSize]);
A1 = ei_pload(&blA[1*PacketSize]);
B0 = ei_pload(&blB[0*PacketSize]);
B1 = ei_pload(&blB[1*PacketSize]);
C0 = ei_pmadd(B0, A0, C0);
if(nr==4) B2 = ei_pload(&blB[2*PacketSize]);
C4 = ei_pmadd(B0, A1, C4);
if(nr==4) B3 = ei_pload(&blB[3*PacketSize]);
C1 = ei_pmadd(B1, A0, C1);
C5 = ei_pmadd(B1, A1, C5);
if(nr==4) C2 = ei_pmadd(B2, A0, C2);
if(nr==4) C6 = ei_pmadd(B2, A1, C6);
if(nr==4) C3 = ei_pmadd(B3, A0, C3);
if(nr==4) C7 = ei_pmadd(B3, A1, C7);
blB += nr*PacketSize;
blA += mr;
}
ei_pstoreu(&res[(j2+0)*resStride + i2 + i], C0);
ei_pstoreu(&res[(j2+1)*resStride + i2 + i], C1);
if(nr==4) ei_pstoreu(&res[(j2+2)*resStride + i2 + i], C2);
if(nr==4) ei_pstoreu(&res[(j2+3)*resStride + i2 + i], C3);
ei_pstoreu(&res[(j2+0)*resStride + i2 + i + PacketSize], C4);
ei_pstoreu(&res[(j2+1)*resStride + i2 + i + PacketSize], C5);
if(nr==4) ei_pstoreu(&res[(j2+2)*resStride + i2 + i + PacketSize], C6);
if(nr==4) ei_pstoreu(&res[(j2+3)*resStride + i2 + i + PacketSize], C7);
}
for(int i=peeled_mc; i<actual_mc; i++)
{
const Scalar* blA = &blockA[i*actual_kc];
#ifdef EIGEN_VECTORIZE_SSE
_mm_prefetch((const char*)(&blA[0]), _MM_HINT_T0);
#endif
// gets a 1 x nr res block as registers
Scalar C0(0), C1(0), C2(0), C3(0);
const Scalar* blB = &blockB[j2*actual_kc*PacketSize];
for(int k=0; k<actual_kc; k++)
{
Scalar B0, B1, B2, B3, A0;
A0 = blA[k];
B0 = blB[0*PacketSize];
B1 = blB[1*PacketSize];
C0 += B0 * A0;
if(nr==4) B2 = blB[2*PacketSize];
if(nr==4) B3 = blB[3*PacketSize];
C1 += B1 * A0;
if(nr==4) C2 += B2 * A0;
if(nr==4) C3 += B3 * A0;
blB += nr*PacketSize;
}
res[(j2+0)*resStride + i2 + i] += C0;
res[(j2+1)*resStride + i2 + i] += C1;
if(nr==4) res[(j2+2)*resStride + i2 + i] += C2;
if(nr==4) res[(j2+3)*resStride + i2 + i] += C3;
}
}
// remaining rhs/res columns (<nr)
for(int j2=packet_cols; j2<cols; j2++)
{
for(int i=0; i<actual_mc; i++)
{
Scalar c0 = res[(j2)*resStride + i2+i];
if (lhsRowMajor)
for(int k=0; k<actual_kc; k++)
c0 += lhs[(k2+k)+(i2+i)*lhsStride] * rhs[j2*rhsStride + k2 + k];
else
for(int k=0; k<actual_kc; k++)
c0 += lhs[(k2+k)*lhsStride + i2+i] * rhs[j2*rhsStride + k2 + k];
res[(j2)*resStride + i2+i] = c0;
}
}
}
}
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, blockB, kc*cols*PacketSize);
#else // alternate product from cylmor
enum {
PacketSize = sizeof(PacketType)/sizeof(Scalar),
#if (defined __i386__)
// i386 architecture provides only 8 xmm registers,
// so let's reduce the max number of rows processed at once.
MaxBlockRows = 4,
MaxBlockRows_ClampingMask = 0xFFFFFC,
#else
MaxBlockRows = 8,
MaxBlockRows_ClampingMask = 0xFFFFF8,
#endif
// maximal size of the blocks fitted in L2 cache
MaxL2BlockSize = ei_L2_block_traits<EIGEN_TUNE_FOR_CPU_CACHE_SIZE,Scalar>::width
};
const bool resIsAligned = (PacketSize==1) || (((resStride%PacketSize) == 0) && (size_t(res)%16==0));
const int remainingSize = depth % PacketSize;
const int size = depth - remainingSize; // third dimension of the product clamped to packet boundaries
const int l2BlockRows = MaxL2BlockSize > rows ? rows : 512;
const int l2BlockCols = MaxL2BlockSize > cols ? cols : 128;
const int l2BlockSize = MaxL2BlockSize > size ? size : 256;
const int l2BlockSizeAligned = (1 + std::max(l2BlockSize,l2BlockCols)/PacketSize)*PacketSize;
const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0));
Scalar* EIGEN_RESTRICT block = new Scalar[l2BlockRows*size];
// for(int i=0; i<l2BlockRows*l2BlockSize; ++i)
// block[i] = 0;
// loops on each L2 cache friendly blocks of lhs
for(int l2k=0; l2k<depth; l2k+=l2BlockSize)
{
for(int l2i=0; l2i<rows; l2i+=l2BlockRows)
{
// We have selected a block of lhs
// Packs this block into 'block'
int count = 0;
for(int k=0; k<l2BlockSize; k+=MaxBlockRows)
{
for(int i=0; i<l2BlockRows; i+=2*PacketSize)
for (int w=0; w<MaxBlockRows; ++w)
for (int y=0; y<2*PacketSize; ++y)
block[count++] = lhs[(k+l2k+w)*lhsStride + l2i+i+ y];
}
// loops on each L2 cache firendly block of the result/rhs
for(int l2j=0; l2j<cols; l2j+=l2BlockCols)
{
for(int k=0; k<l2BlockSize; k+=MaxBlockRows)
{
for(int j=0; j<l2BlockCols; ++j)
{
PacketType A0, A1, A2, A3, A4, A5, A6, A7;
// Load the packets from rhs and reorder them
// Here we need some vector reordering
// Right now its hardcoded to packets of 4 elements
const Scalar* lrhs = &rhs[(j+l2j)*rhsStride+(k+l2k)];
A0 = ei_pset1(lrhs[0]);
A1 = ei_pset1(lrhs[1]);
A2 = ei_pset1(lrhs[2]);
A3 = ei_pset1(lrhs[3]);
if (MaxBlockRows==8)
{
A4 = ei_pset1(lrhs[4]);
A5 = ei_pset1(lrhs[5]);
A6 = ei_pset1(lrhs[6]);
A7 = ei_pset1(lrhs[7]);
}
Scalar * lb = &block[l2BlockRows * k];
for(int i=0; i<l2BlockRows; i+=2*PacketSize)
{
PacketType R0, R1, L0, L1, T0, T1;
// We perform "cross products" of vectors to avoid
// reductions (horizontal ops) afterwards
T0 = ei_pload(&res[(j+l2j)*resStride+l2i+i]);
T1 = ei_pload(&res[(j+l2j)*resStride+l2i+i+PacketSize]);
R0 = ei_pload(&lb[0*PacketSize]);
L0 = ei_pload(&lb[1*PacketSize]);
R1 = ei_pload(&lb[2*PacketSize]);
L1 = ei_pload(&lb[3*PacketSize]);
T0 = ei_pmadd(R0, A0, T0);
T1 = ei_pmadd(L0, A0, T1);
R0 = ei_pload(&lb[4*PacketSize]);
L0 = ei_pload(&lb[5*PacketSize]);
T0 = ei_pmadd(R1, A1, T0);
T1 = ei_pmadd(L1, A1, T1);
R1 = ei_pload(&lb[6*PacketSize]);
L1 = ei_pload(&lb[7*PacketSize]);
T0 = ei_pmadd(R0, A2, T0);
T1 = ei_pmadd(L0, A2, T1);
if(MaxBlockRows==8)
{
R0 = ei_pload(&lb[8*PacketSize]);
L0 = ei_pload(&lb[9*PacketSize]);
}
T0 = ei_pmadd(R1, A3, T0);
T1 = ei_pmadd(L1, A3, T1);
if(MaxBlockRows==8)
{
R1 = ei_pload(&lb[10*PacketSize]);
L1 = ei_pload(&lb[11*PacketSize]);
T0 = ei_pmadd(R0, A4, T0);
T1 = ei_pmadd(L0, A4, T1);
R0 = ei_pload(&lb[12*PacketSize]);
L0 = ei_pload(&lb[13*PacketSize]);
T0 = ei_pmadd(R1, A5, T0);
T1 = ei_pmadd(L1, A5, T1);
R1 = ei_pload(&lb[14*PacketSize]);
L1 = ei_pload(&lb[15*PacketSize]);
T0 = ei_pmadd(R0, A6, T0);
T1 = ei_pmadd(L0, A6, T1);
T0 = ei_pmadd(R1, A7, T0);
T1 = ei_pmadd(L1, A7, T1);
}
lb += MaxBlockRows*2*PacketSize;
ei_pstore(&res[(j+l2j)*resStride+l2i+i], T0);
ei_pstore(&res[(j+l2j)*resStride+l2i+i+PacketSize], T1);
}
}
}
}
}
}
delete[] block;
#endif
}
#endif // EIGEN_EXTERN_INSTANTIATIONS
#endif // EIGEN_GENERAL_MATRIX_MATRIX_H

View File

@@ -22,327 +22,8 @@
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_CACHE_FRIENDLY_PRODUCT_H
#define EIGEN_CACHE_FRIENDLY_PRODUCT_H
template <int L2MemorySize,typename Scalar>
struct ei_L2_block_traits {
enum {width = 8 * ei_meta_sqrt<L2MemorySize/(64*sizeof(Scalar))>::ret };
};
#ifndef EIGEN_EXTERN_INSTANTIATIONS
template<typename Scalar>
static void ei_cache_friendly_product(
int _rows, int _cols, int depth,
bool _lhsRowMajor, const Scalar* _lhs, int _lhsStride,
bool _rhsRowMajor, const Scalar* _rhs, int _rhsStride,
bool resRowMajor, Scalar* res, int resStride)
{
const Scalar* EIGEN_RESTRICT lhs;
const Scalar* EIGEN_RESTRICT rhs;
int lhsStride, rhsStride, rows, cols;
bool lhsRowMajor;
if (resRowMajor)
{
lhs = _rhs;
rhs = _lhs;
lhsStride = _rhsStride;
rhsStride = _lhsStride;
cols = _rows;
rows = _cols;
lhsRowMajor = !_rhsRowMajor;
ei_assert(_lhsRowMajor);
}
else
{
lhs = _lhs;
rhs = _rhs;
lhsStride = _lhsStride;
rhsStride = _rhsStride;
rows = _rows;
cols = _cols;
lhsRowMajor = _lhsRowMajor;
ei_assert(!_rhsRowMajor);
}
typedef typename ei_packet_traits<Scalar>::type PacketType;
enum {
PacketSize = sizeof(PacketType)/sizeof(Scalar),
#if (defined __i386__)
// i386 architecture provides only 8 xmm registers,
// so let's reduce the max number of rows processed at once.
MaxBlockRows = 4,
MaxBlockRows_ClampingMask = 0xFFFFFC,
#else
MaxBlockRows = 8,
MaxBlockRows_ClampingMask = 0xFFFFF8,
#endif
// maximal size of the blocks fitted in L2 cache
MaxL2BlockSize = ei_L2_block_traits<EIGEN_TUNE_FOR_CPU_CACHE_SIZE,Scalar>::width
};
const bool resIsAligned = (PacketSize==1) || (((resStride%PacketSize) == 0) && (size_t(res)%16==0));
const int remainingSize = depth % PacketSize;
const int size = depth - remainingSize; // third dimension of the product clamped to packet boundaries
const int l2BlockRows = MaxL2BlockSize > rows ? rows : MaxL2BlockSize;
const int l2BlockCols = MaxL2BlockSize > cols ? cols : MaxL2BlockSize;
const int l2BlockSize = MaxL2BlockSize > size ? size : MaxL2BlockSize;
const int l2BlockSizeAligned = (1 + std::max(l2BlockSize,l2BlockCols)/PacketSize)*PacketSize;
const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0));
Scalar* EIGEN_RESTRICT block = 0;
const int allocBlockSize = l2BlockRows*size;
block = ei_aligned_stack_new(Scalar, allocBlockSize);
Scalar* EIGEN_RESTRICT rhsCopy
= ei_aligned_stack_new(Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
// loops on each L2 cache friendly blocks of the result
for(int l2i=0; l2i<rows; l2i+=l2BlockRows)
{
const int l2blockRowEnd = std::min(l2i+l2BlockRows, rows);
const int l2blockRowEndBW = l2blockRowEnd & MaxBlockRows_ClampingMask; // end of the rows aligned to bw
const int l2blockRemainingRows = l2blockRowEnd - l2blockRowEndBW; // number of remaining rows
//const int l2blockRowEndBWPlusOne = l2blockRowEndBW + (l2blockRemainingRows?0:MaxBlockRows);
// build a cache friendly blocky matrix
int count = 0;
// copy l2blocksize rows of m_lhs to blocks of ps x bw
for(int l2k=0; l2k<size; l2k+=l2BlockSize)
{
const int l2blockSizeEnd = std::min(l2k+l2BlockSize, size);
for (int i = l2i; i<l2blockRowEndBW/*PlusOne*/; i+=MaxBlockRows)
{
// TODO merge the "if l2blockRemainingRows" using something like:
// const int blockRows = std::min(i+MaxBlockRows, rows) - i;
for (int k=l2k; k<l2blockSizeEnd; k+=PacketSize)
{
// TODO write these loops using meta unrolling
// negligible for large matrices but useful for small ones
if (lhsRowMajor)
{
for (int w=0; w<MaxBlockRows; ++w)
for (int s=0; s<PacketSize; ++s)
block[count++] = lhs[(i+w)*lhsStride + (k+s)];
}
else
{
for (int w=0; w<MaxBlockRows; ++w)
for (int s=0; s<PacketSize; ++s)
block[count++] = lhs[(i+w) + (k+s)*lhsStride];
}
}
}
if (l2blockRemainingRows>0)
{
for (int k=l2k; k<l2blockSizeEnd; k+=PacketSize)
{
if (lhsRowMajor)
{
for (int w=0; w<l2blockRemainingRows; ++w)
for (int s=0; s<PacketSize; ++s)
block[count++] = lhs[(l2blockRowEndBW+w)*lhsStride + (k+s)];
}
else
{
for (int w=0; w<l2blockRemainingRows; ++w)
for (int s=0; s<PacketSize; ++s)
block[count++] = lhs[(l2blockRowEndBW+w) + (k+s)*lhsStride];
}
}
}
}
for(int l2j=0; l2j<cols; l2j+=l2BlockCols)
{
int l2blockColEnd = std::min(l2j+l2BlockCols, cols);
for(int l2k=0; l2k<size; l2k+=l2BlockSize)
{
// acumulate bw rows of lhs time a single column of rhs to a bw x 1 block of res
int l2blockSizeEnd = std::min(l2k+l2BlockSize, size);
// if not aligned, copy the rhs block
if (needRhsCopy)
for(int l1j=l2j; l1j<l2blockColEnd; l1j+=1)
{
ei_internal_assert(l2BlockSizeAligned*(l1j-l2j)+(l2blockSizeEnd-l2k) < l2BlockSizeAligned*l2BlockSizeAligned);
memcpy(rhsCopy+l2BlockSizeAligned*(l1j-l2j),&(rhs[l1j*rhsStride+l2k]),(l2blockSizeEnd-l2k)*sizeof(Scalar));
}
// for each bw x 1 result's block
for(int l1i=l2i; l1i<l2blockRowEndBW; l1i+=MaxBlockRows)
{
int offsetblock = l2k * (l2blockRowEnd-l2i) + (l1i-l2i)*(l2blockSizeEnd-l2k) - l2k*MaxBlockRows;
const Scalar* EIGEN_RESTRICT localB = &block[offsetblock];
for(int l1j=l2j; l1j<l2blockColEnd; l1j+=1)
{
const Scalar* EIGEN_RESTRICT rhsColumn;
if (needRhsCopy)
rhsColumn = &(rhsCopy[l2BlockSizeAligned*(l1j-l2j)-l2k]);
else
rhsColumn = &(rhs[l1j*rhsStride]);
PacketType dst[MaxBlockRows];
dst[3] = dst[2] = dst[1] = dst[0] = ei_pset1(Scalar(0.));
if (MaxBlockRows==8)
dst[7] = dst[6] = dst[5] = dst[4] = dst[0];
PacketType tmp;
for(int k=l2k; k<l2blockSizeEnd; k+=PacketSize)
{
tmp = ei_ploadu(&rhsColumn[k]);
PacketType A0, A1, A2, A3, A4, A5;
A0 = ei_pload(localB + k*MaxBlockRows);
A1 = ei_pload(localB + k*MaxBlockRows+1*PacketSize);
A2 = ei_pload(localB + k*MaxBlockRows+2*PacketSize);
A3 = ei_pload(localB + k*MaxBlockRows+3*PacketSize);
if (MaxBlockRows==8) A4 = ei_pload(localB + k*MaxBlockRows+4*PacketSize);
if (MaxBlockRows==8) A5 = ei_pload(localB + k*MaxBlockRows+5*PacketSize);
dst[0] = ei_pmadd(tmp, A0, dst[0]);
if (MaxBlockRows==8) A0 = ei_pload(localB + k*MaxBlockRows+6*PacketSize);
dst[1] = ei_pmadd(tmp, A1, dst[1]);
if (MaxBlockRows==8) A1 = ei_pload(localB + k*MaxBlockRows+7*PacketSize);
dst[2] = ei_pmadd(tmp, A2, dst[2]);
dst[3] = ei_pmadd(tmp, A3, dst[3]);
if (MaxBlockRows==8)
{
dst[4] = ei_pmadd(tmp, A4, dst[4]);
dst[5] = ei_pmadd(tmp, A5, dst[5]);
dst[6] = ei_pmadd(tmp, A0, dst[6]);
dst[7] = ei_pmadd(tmp, A1, dst[7]);
}
}
Scalar* EIGEN_RESTRICT localRes = &(res[l1i + l1j*resStride]);
if (PacketSize>1 && resIsAligned)
{
// the result is aligned: let's do packet reduction
ei_pstore(&(localRes[0]), ei_padd(ei_pload(&(localRes[0])), ei_preduxp(&dst[0])));
if (PacketSize==2)
ei_pstore(&(localRes[2]), ei_padd(ei_pload(&(localRes[2])), ei_preduxp(&(dst[2]))));
if (MaxBlockRows==8)
{
ei_pstore(&(localRes[4]), ei_padd(ei_pload(&(localRes[4])), ei_preduxp(&(dst[4]))));
if (PacketSize==2)
ei_pstore(&(localRes[6]), ei_padd(ei_pload(&(localRes[6])), ei_preduxp(&(dst[6]))));
}
}
else
{
// not aligned => per coeff packet reduction
localRes[0] += ei_predux(dst[0]);
localRes[1] += ei_predux(dst[1]);
localRes[2] += ei_predux(dst[2]);
localRes[3] += ei_predux(dst[3]);
if (MaxBlockRows==8)
{
localRes[4] += ei_predux(dst[4]);
localRes[5] += ei_predux(dst[5]);
localRes[6] += ei_predux(dst[6]);
localRes[7] += ei_predux(dst[7]);
}
}
}
}
if (l2blockRemainingRows>0)
{
int offsetblock = l2k * (l2blockRowEnd-l2i) + (l2blockRowEndBW-l2i)*(l2blockSizeEnd-l2k) - l2k*l2blockRemainingRows;
const Scalar* localB = &block[offsetblock];
for(int l1j=l2j; l1j<l2blockColEnd; l1j+=1)
{
const Scalar* EIGEN_RESTRICT rhsColumn;
if (needRhsCopy)
rhsColumn = &(rhsCopy[l2BlockSizeAligned*(l1j-l2j)-l2k]);
else
rhsColumn = &(rhs[l1j*rhsStride]);
PacketType dst[MaxBlockRows];
dst[3] = dst[2] = dst[1] = dst[0] = ei_pset1(Scalar(0.));
if (MaxBlockRows==8)
dst[7] = dst[6] = dst[5] = dst[4] = dst[0];
// let's declare a few other temporary registers
PacketType tmp;
for(int k=l2k; k<l2blockSizeEnd; k+=PacketSize)
{
tmp = ei_pload(&rhsColumn[k]);
dst[0] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows ])), dst[0]);
if (l2blockRemainingRows>=2) dst[1] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+ PacketSize])), dst[1]);
if (l2blockRemainingRows>=3) dst[2] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+2*PacketSize])), dst[2]);
if (l2blockRemainingRows>=4) dst[3] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+3*PacketSize])), dst[3]);
if (MaxBlockRows==8)
{
if (l2blockRemainingRows>=5) dst[4] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+4*PacketSize])), dst[4]);
if (l2blockRemainingRows>=6) dst[5] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+5*PacketSize])), dst[5]);
if (l2blockRemainingRows>=7) dst[6] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+6*PacketSize])), dst[6]);
if (l2blockRemainingRows>=8) dst[7] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+7*PacketSize])), dst[7]);
}
}
Scalar* EIGEN_RESTRICT localRes = &(res[l2blockRowEndBW + l1j*resStride]);
// process the remaining rows once at a time
localRes[0] += ei_predux(dst[0]);
if (l2blockRemainingRows>=2) localRes[1] += ei_predux(dst[1]);
if (l2blockRemainingRows>=3) localRes[2] += ei_predux(dst[2]);
if (l2blockRemainingRows>=4) localRes[3] += ei_predux(dst[3]);
if (MaxBlockRows==8)
{
if (l2blockRemainingRows>=5) localRes[4] += ei_predux(dst[4]);
if (l2blockRemainingRows>=6) localRes[5] += ei_predux(dst[5]);
if (l2blockRemainingRows>=7) localRes[6] += ei_predux(dst[6]);
if (l2blockRemainingRows>=8) localRes[7] += ei_predux(dst[7]);
}
}
}
}
}
}
if (PacketSize>1 && remainingSize)
{
if (lhsRowMajor)
{
for (int j=0; j<cols; ++j)
for (int i=0; i<rows; ++i)
{
Scalar tmp = lhs[i*lhsStride+size] * rhs[j*rhsStride+size];
// FIXME this loop get vectorized by the compiler !
for (int k=1; k<remainingSize; ++k)
tmp += lhs[i*lhsStride+size+k] * rhs[j*rhsStride+size+k];
res[i+j*resStride] += tmp;
}
}
else
{
for (int j=0; j<cols; ++j)
for (int i=0; i<rows; ++i)
{
Scalar tmp = lhs[i+size*lhsStride] * rhs[j*rhsStride+size];
for (int k=1; k<remainingSize; ++k)
tmp += lhs[i+(size+k)*lhsStride] * rhs[j*rhsStride+size+k];
res[i+j*resStride] += tmp;
}
}
}
ei_aligned_stack_delete(Scalar, block, allocBlockSize);
ei_aligned_stack_delete(Scalar, rhsCopy, l2BlockSizeAligned*l2BlockSizeAligned);
}
#endif // EIGEN_EXTERN_INSTANTIATIONS
#ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
#define EIGEN_GENERAL_MATRIX_VECTOR_H
/* Optimized col-major matrix * vector product:
* This algorithm processes 4 columns at onces that allows to both reduce
@@ -495,7 +176,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_colmajor_times_vector(
/* process remaining coeffs (or all if there is no explicit vectorization) */
for (int j=alignedSize; j<size; ++j)
res[j] += ei_pfirst(ptmp0)*lhs0[j] + ei_pfirst(ptmp1)*lhs1[j] + ei_pfirst(ptmp2)*lhs2[j] + ei_pfirst(ptmp3)*lhs3[j];
res[j] += ei_pfirst(ptmp0)*lhs0[j] + ei_pfirst(ptmp1)*lhs1[j] + ei_pfirst(ptmp2)*lhs2[j] + ei_pfirst(ptmp3)*lhs3[j];
}
// process remaining first and last columns (at most columnsAtOnce-1)
@@ -750,4 +431,4 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
#undef _EIGEN_ACCUMULATE_PACKETS
}
#endif // EIGEN_CACHE_FRIENDLY_PRODUCT_H
#endif // EIGEN_GENERAL_MATRIX_VECTOR_H

View File

@@ -0,0 +1,146 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
#define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
template<bool Conjugate> struct ei_conj_if {
template<typename Scalar> Scalar operator() (const Scalar& x) const { return ei_conj(x); }
};
template<> struct ei_conj_if<false> {
template<typename Scalar> Scalar& operator() (Scalar& x) const { return x; }
};
/* Optimized col-major selfadjoint matrix * vector product:
* This algorithm processes 2 columns at onces that allows to both reduce
* the number of load/stores of the result by a factor 2 and to reduce
* the instruction dependency.
*/
template<typename Scalar, int StorageOrder, int UpLo>
static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
int size,
const Scalar* lhs, int lhsStride,
const Scalar* rhs, //int rhsIncr,
Scalar* res)
{
typedef typename ei_packet_traits<Scalar>::type Packet;
const int PacketSize = sizeof(Packet)/sizeof(Scalar);
enum {
IsRowMajor = StorageOrder==RowMajorBit ? 1 : 0,
IsLower = UpLo == LowerTriangularBit ? 1 : 0,
FirstTriangular = IsRowMajor == IsLower
};
ei_conj_if<NumTraits<Scalar>::IsComplex && IsRowMajor> conj0;
ei_conj_if<NumTraits<Scalar>::IsComplex && !IsRowMajor> conj1;
for (int i=0;i<size;i++)
res[i] = 0;
int bound = std::max(0,size-8) & 0xfffffffE;
if (FirstTriangular)
bound = size - bound;
for (int j=FirstTriangular ? bound : 0;
j<(FirstTriangular ? size : bound);j+=2)
{
register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
Scalar t0 = rhs[j];
Packet ptmp0 = ei_pset1(t0);
Scalar t1 = rhs[j+1];
Packet ptmp1 = ei_pset1(t1);
Scalar t2 = 0;
Packet ptmp2 = ei_pset1(t2);
Scalar t3 = 0;
Packet ptmp3 = ei_pset1(t3);
size_t starti = FirstTriangular ? 0 : j+2;
size_t endi = FirstTriangular ? j : size;
size_t alignedEnd = starti;
size_t alignedStart = (starti) + ei_alignmentOffset(&res[starti], endi-starti);
alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
res[j] += t0 * conj0(A0[j]);
if(FirstTriangular)
{
res[j+1] += t1 * conj0(A1[j+1]);
res[j] += t1 * conj0(A1[j]);
t3 += conj1(A1[j]) * rhs[j];
}
else
{
res[j+1] += t0 * conj0(A0[j+1]) + t1 * conj0(A1[j+1]);
t2 += conj1(A0[j+1]) * rhs[j+1];
}
for (size_t i=starti; i<alignedStart; ++i)
{
res[i] += t0 * A0[i] + t1 * A1[i];
t2 += ei_conj(A0[i]) * rhs[i];
t3 += ei_conj(A1[i]) * rhs[i];
}
for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
{
Packet A0i = ei_ploadu(&A0[i]);
Packet A1i = ei_ploadu(&A1[i]);
Packet Bi = ei_ploadu(&rhs[i]); // FIXME should be aligned in most cases
Packet Xi = ei_pload(&res[i]);
Xi = ei_padd(ei_padd(Xi, ei_pmul(ptmp0, conj0(A0i))), ei_pmul(ptmp1, conj0(A1i)));
ptmp2 = ei_padd(ptmp2, ei_pmul(conj1(A0i), Bi));
ptmp3 = ei_padd(ptmp3, ei_pmul(conj1(A1i), Bi));
ei_pstore(&res[i],Xi);
}
for (size_t i=alignedEnd; i<endi; i++)
{
res[i] += t0 * conj0(A0[i]) + t1 * conj0(A1[i]);
t2 += conj1(A0[i]) * rhs[i];
t3 += conj1(A1[i]) * rhs[i];
}
res[j] += t2 + ei_predux(ptmp2);
res[j+1] += t3 + ei_predux(ptmp3);
}
for (int j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
{
register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
Scalar t1 = rhs[j];
Scalar t2 = 0;
res[j] += t1 * conj0(A0[j]);
for (int i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++) {
res[i] += t1 * conj0(A0[i]);
t2 += conj1(A0[i]) * rhs[i];
}
res[j] += t2;
}
}
#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H

View File

@@ -2,5 +2,5 @@ FILE(GLOB Eigen_Core_util_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_util_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/util
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/util COMPONENT Devel
)

View File

@@ -30,17 +30,19 @@
* stored in some runtime variable.
*
* Explanation for the choice of this value:
* - It should be positive and larger than any reasonable compile-time-fixed number of rows or columns.
* - It should be positive and larger than the number of entries in any reasonable fixed-size matrix.
* This allows to simplify many compile-time conditions throughout Eigen.
* - It should be smaller than the sqrt of INT_MAX. Indeed, we often multiply a number of rows with a number
* of columns in order to compute a number of coefficients. Even if we guard that with an "if" checking whether
* the values are Dynamic, we still get a compiler warning "integer overflow". So the only way to get around
* it would be a meta-selector. Doing this everywhere would reduce code readability and lenghten compilation times.
* Also, disabling compiler warnings for integer overflow, sounds like a bad idea.
* - It should be a prime number, because for example the old value 10000 led to bugs with 100x100 matrices.
*
* If you wish to port Eigen to a platform where sizeof(int)==2, it is perfectly possible to set Dynamic to, say, 100.
* If you wish to port Eigen to a platform where sizeof(int)==2, it is perfectly possible to set Dynamic to, say, 97.
* However, changing the value of Dynamic breaks the ABI, as Dynamic is often used as a template parameter for Matrix.
*/
const int Dynamic = 10000;
const int Dynamic = 33331;
/** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>().
* The value Infinity there means the L-infinity norm.
@@ -138,7 +140,7 @@ const unsigned int LinearAccessBit = 0x10;
* First, references to the coefficients must be available through coeffRef(int, int). This rules out read-only
* expressions whose coefficients are computed on demand by coeff(int, int). Second, the memory layout of the
* array of coefficients must be exactly the natural one suggested by rows(), cols(), stride(), and the RowMajorBit.
* This rules out expressions such as DiagonalCoeffs, whose coefficients, though referencable, do not have
* This rules out expressions such as Diagonal, whose coefficients, though referencable, do not have
* such a regular memory layout.
*/
const unsigned int DirectAccessBit = 0x20;
@@ -184,23 +186,30 @@ const unsigned int HereditaryBits = RowMajorBit
| EvalBeforeAssigningBit
| SparseBit;
// Possible values for the Mode parameter of part() and of extract()
// diagonal means both upper and lower triangular
const unsigned DiagonalBits = UpperTriangularBit | LowerTriangularBit;
// Possible values for the Mode parameter of part()
const unsigned int UpperTriangular = UpperTriangularBit;
const unsigned int StrictlyUpperTriangular = UpperTriangularBit | ZeroDiagBit;
const unsigned int LowerTriangular = LowerTriangularBit;
const unsigned int StrictlyLowerTriangular = LowerTriangularBit | ZeroDiagBit;
const unsigned int SelfAdjoint = SelfAdjointBit;
// additional possible values for the Mode parameter of extract()
const unsigned int UnitUpperTriangular = UpperTriangularBit | UnitDiagBit;
const unsigned int UnitLowerTriangular = LowerTriangularBit | UnitDiagBit;
const unsigned int Diagonal = UpperTriangular | LowerTriangular;
template<typename T> struct ei_is_diagonal
{
enum {
ret = ( (unsigned int)(T::Flags) & DiagonalBits ) == DiagonalBits
};
};
enum { Aligned, Unaligned };
enum { ForceAligned, AsRequested };
enum { ConditionalJumpCost = 5 };
enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
enum DirectionType { Vertical, Horizontal };
enum DirectionType { Vertical, Horizontal, BothDirections };
enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, DiagonalProduct, SparseTimeSparseProduct, SparseTimeDenseProduct, DenseTimeSparseProduct };
enum {
@@ -225,11 +234,11 @@ enum {
enum {
ColMajor = 0,
RowMajor = 0x1, // it is only a coincidence that this is equal to RowMajorBit -- don't rely on that
/** \internal Align the matrix itself if it is vectorizable fixed-size */
AutoAlign = 0,
/** \internal Don't require alignment for the matrix itself (the array of coefficients, if dynamically allocated, may still be
requested to be aligned) */
DontAlign = 0,
/** \internal Align the matrix itself if it is vectorizable fixed-size */
AutoAlign = 0x2
DontAlign = 0x2
};
enum {
@@ -239,4 +248,23 @@ enum {
HasDirectAccess = DirectAccessBit
};
enum TransformTraits {
Isometry = 0x1,
Affine = 0x2,
AffineCompact = 0x10 | Affine,
Projective = 0x20
};
const int EiArch_Generic = 0x0;
const int EiArch_SSE = 0x1;
const int EiArch_AltiVec = 0x2;
#if defined EIGEN_VECTORIZE_SSE
const int EiArch = EiArch_SSE;
#elif defined EIGEN_VECTORIZE_ALTIVEC
const int EiArch = EiArch_AltiVec;
#else
const int EiArch = EiArch_Generic;
#endif
#endif // EIGEN_CONSTANTS_H

View File

@@ -45,14 +45,17 @@ template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
template<typename Lhs, typename Rhs, int ProductMode> class Product;
template<typename CoeffsVectorType> class DiagonalMatrix;
template<typename MatrixType> class DiagonalCoeffs;
template<typename CoeffsVectorType, typename Derived> class DiagonalMatrixBase;
template<typename CoeffsVectorType> class DiagonalMatrixWrapper;
template<typename _Scalar, int _Size> class DiagonalMatrix;
template<typename MatrixType, int Index> class Diagonal;
template<typename MatrixType, int PacketAccess = AsRequested> class Map;
template<typename MatrixType, unsigned int Mode> class Part;
template<typename MatrixType, unsigned int Mode> class Extract;
template<typename ExpressionType> class Cwise;
template<typename ExpressionType> class WithFormat;
template<typename MatrixType> struct CommaInitializer;
template<typename Functor, typename EvalType> class ReturnByValue;
template<typename Lhs, typename Rhs> struct ei_product_mode;
@@ -87,6 +90,8 @@ template<typename Scalar> struct ei_scalar_add_op;
template<typename Scalar> struct ei_scalar_constant_op;
template<typename Scalar> struct ei_scalar_identity_op;
template<typename Scalar1,typename Scalar2> struct ei_scalar_multiple2_op;
struct IOFormat;
template<typename Scalar>
@@ -100,8 +105,11 @@ void ei_cache_friendly_product(
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> class Select;
template<typename MatrixType, typename BinaryOp, int Direction> class PartialReduxExpr;
template<typename ExpressionType, int Direction> class PartialRedux;
template<typename MatrixType,int RowFactor,int ColFactor> class Replicate;
template<typename MatrixType, int Direction = BothDirections> class Reverse;
template<typename MatrixType> class LU;
template<typename MatrixType> class PartialLU;
template<typename MatrixType> class QR;
template<typename MatrixType> class SVD;
template<typename MatrixType> class LLT;
@@ -113,11 +121,12 @@ template<typename Lhs, typename Rhs> class Cross;
template<typename Scalar> class Quaternion;
template<typename Scalar> class Rotation2D;
template<typename Scalar> class AngleAxis;
template<typename Scalar,int Dim> class Transform;
template<typename Scalar,int Dim,int Mode=Affine> class Transform;
template <typename _Scalar, int _AmbientDim> class ParametrizedLine;
template <typename _Scalar, int _AmbientDim> class Hyperplane;
template<typename Scalar,int Dim> class Translation;
template<typename Scalar,int Dim> class Scaling;
template<typename Scalar> class UniformScaling;
template<typename MatrixType,int Direction> class Homogeneous;
// Sparse module:
template<typename Lhs, typename Rhs, int ProductMode> class SparseProduct;

View File

@@ -30,7 +30,7 @@
#define EIGEN_WORLD_VERSION 2
#define EIGEN_MAJOR_VERSION 0
#define EIGEN_MINOR_VERSION 2
#define EIGEN_MINOR_VERSION 52
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -41,34 +41,41 @@
// because extra memory must be allocated for bookkeeping).
// if the compiler is not GNUC, just cross fingers that the architecture isn't too exotic, because we don't want
// to keep track of all the different preprocessor symbols for all compilers.
#if !defined(__GNUC__) || defined(__i386__) || defined(__x86_64__) || defined(__ppc__) || defined(__ia64__)
#if (!defined(__GNUC__)) || defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__ia64__)
#define EIGEN_ARCH_WANTS_ALIGNMENT 1
#else
#define EIGEN_ARCH_WANTS_ALIGNMENT 0
#endif
// EIGEN_ALIGN is the true test whether we want to align or not. It takes into account both the user choice to explicitly disable
// alignment (EIGEN_DONT_ALIGN) and the architecture config (EIGEN_ARCH_WANTS_ALIGNMENT). Henceforth, only EIGEN_ALIGN should be used.
#if EIGEN_ARCH_WANTS_ALIGNMENT && !defined(EIGEN_DONT_ALIGN)
#define EIGEN_ALIGN 1
#else
#define EIGEN_ALIGN 0
#ifdef EIGEN_VECTORIZE
#error Vectorization enabled, but the architecture is not listed among those for which we require 16 byte alignment. If you added vectorization for another architecture, you also need to edit this list.
#endif
#define EIGEN_ARCH_WANTS_ALIGNMENT 0
#ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#endif
#endif
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION RowMajor
#else
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ColMajor
#endif
/** \internal Defines the maximal loop size to enable meta unrolling of loops.
* Note that the value here is expressed in Eigen's own notion of "number of FLOPS",
* it does not correspond to the number of iterations or the number of instructions
/** Defines the maximal loop size to enable meta unrolling of loops.
* Note that the value here is expressed in Eigen's own notion of "number of FLOPS",
* it does not correspond to the number of iterations or the number of instructions
*/
#ifndef EIGEN_UNROLLING_LIMIT
#define EIGEN_UNROLLING_LIMIT 100
#endif
/** \internal Define the maximal size in Bytes of blocks fitting in CPU cache.
/** Defines the maximal size in Bytes of blocks fitting in CPU cache.
* The current value is set to generate blocks of 256x256 for float
*
* Typically for a single-threaded application you would set that to 25% of the size of your CPU caches in bytes
@@ -77,9 +84,13 @@
#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (sizeof(float)*256*256)
#endif
// FIXME this should go away quickly
#ifdef EIGEN_TUNE_FOR_L2_CACHE_SIZE
#error EIGEN_TUNE_FOR_L2_CACHE_SIZE is now called EIGEN_TUNE_FOR_CPU_CACHE_SIZE.
/** Allows to disable some optimizations which might affect the accuracy of the result.
* Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
* They currently include:
* - single precision Cwise::sin() and Cwise::cos() when SSE vectorization is enabled.
*/
#ifndef EIGEN_FAST_MATH
#define EIGEN_FAST_MATH 1
#endif
#define USING_PART_OF_NAMESPACE_EIGEN \
@@ -159,13 +170,25 @@ using Eigen::ei_cos;
#define EIGEN_DEPRECATED
#endif
#if (defined __GNUC__)
#define EIGEN_UNUSED __attribute__((unused))
#else
#define EIGEN_UNUSED
#endif
#if (defined __GNUC__)
#define EIGEN_ASM_COMMENT(X) asm("#"X)
#else
#define EIGEN_ASM_COMMENT(X)
#endif
/* EIGEN_ALIGN_128 forces data to be 16-byte aligned, EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
* so that vectorization doesn't affect binary compatibility.
*
* If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
* vectorized and non-vectorized code.
*/
#if !EIGEN_ARCH_WANTS_ALIGNMENT
#if !EIGEN_ALIGN
#define EIGEN_ALIGN_128
#elif (defined __GNUC__)
#define EIGEN_ALIGN_128 __attribute__((aligned(16)))
@@ -178,13 +201,24 @@ using Eigen::ei_cos;
#define EIGEN_RESTRICT __restrict
#ifndef EIGEN_STACK_ALLOCATION_LIMIT
#define EIGEN_STACK_ALLOCATION_LIMIT 16000000
#define EIGEN_STACK_ALLOCATION_LIMIT 1000000
#endif
#ifndef EIGEN_DEFAULT_IO_FORMAT
#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat()
#endif
// just an empty macro !
#define EIGEN_EMPTY
// concatenate two tokens
#define EIGEN_CAT2(a,b) a ## b
#define EIGEN_CAT(a,b) EIGEN_CAT2(a,b)
// convert a token to a string
#define EIGEN_MAKESTRING2(a) #a
#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
// format used in Eigen's documentation
// needed to define it here as escaping characters in CMake add_definition's argument seems very problematic.
#define EIGEN_DOCS_IO_FORMAT IOFormat(3, AlignCols, " ", "\n", "", "")
@@ -219,6 +253,7 @@ typedef BaseClass Base; \
typedef typename Eigen::ei_traits<Derived>::Scalar Scalar; \
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; \
typedef typename Base::PacketScalar PacketScalar; \
typedef typename Base::CoeffReturnType CoeffReturnType; \
typedef typename Eigen::ei_nested<Derived>::type Nested; \
enum { RowsAtCompileTime = Eigen::ei_traits<Derived>::RowsAtCompileTime, \
ColsAtCompileTime = Eigen::ei_traits<Derived>::ColsAtCompileTime, \
@@ -236,15 +271,4 @@ _EIGEN_GENERIC_PUBLIC_INTERFACE(Derived, Eigen::MatrixBase<Derived>)
#define EIGEN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
#define EIGEN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)
// just an empty macro !
#define EIGEN_EMPTY
// concatenate two tokens
#define EIGEN_CAT2(a,b) a ## b
#define EIGEN_CAT(a,b) EIGEN_CAT2(a,b)
// convert a token to a string
#define EIGEN_MAKESTRING2(a) #a
#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
#endif // EIGEN_MACROS_H

View File

@@ -65,7 +65,7 @@ inline void ei_handmade_aligned_free(void *ptr)
}
/** \internal allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment.
* On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown.
* On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
*/
inline void* ei_aligned_malloc(size_t size)
{
@@ -73,37 +73,30 @@ inline void* ei_aligned_malloc(size_t size)
ei_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
#endif
void *result;
#if EIGEN_HAS_POSIX_MEMALIGN && EIGEN_ARCH_WANTS_ALIGNMENT && !EIGEN_MALLOC_ALREADY_ALIGNED
#ifdef EIGEN_EXCEPTIONS
const int failed =
#endif
posix_memalign(&result, 16, size);
void *result;
#if !EIGEN_ALIGN
result = malloc(size);
#elif EIGEN_MALLOC_ALREADY_ALIGNED
result = malloc(size);
#elif EIGEN_HAS_POSIX_MEMALIGN
if(posix_memalign(&result, 16, size)) result = 0;
#elif EIGEN_HAS_MM_MALLOC
result = _mm_malloc(size, 16);
#elif (defined _MSC_VER)
result = _aligned_malloc(size, 16);
#else
#if !EIGEN_ARCH_WANTS_ALIGNMENT
result = malloc(size);
#elif EIGEN_MALLOC_ALREADY_ALIGNED
result = malloc(size);
#elif EIGEN_HAS_MM_MALLOC
result = _mm_malloc(size, 16);
#elif (defined _MSC_VER)
result = _aligned_malloc(size, 16);
#else
result = ei_handmade_aligned_malloc(size);
#endif
#ifdef EIGEN_EXCEPTIONS
const int failed = (result == 0);
#endif
result = ei_handmade_aligned_malloc(size);
#endif
#ifdef EIGEN_EXCEPTIONS
if(failed)
if(result == 0)
throw std::bad_alloc();
#endif
return result;
}
/** allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
* On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown.
* On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
*/
template<bool Align> inline void* ei_conditional_aligned_malloc(size_t size)
{
@@ -143,7 +136,7 @@ template<typename T, bool Align> inline T* ei_conditional_aligned_new(size_t siz
*/
inline void ei_aligned_free(void *ptr)
{
#if !EIGEN_ARCH_WANTS_ALIGNMENT
#if !EIGEN_ALIGN
free(ptr);
#elif EIGEN_MALLOC_ALREADY_ALIGNED
free(ptr);
@@ -237,17 +230,40 @@ inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
ei_aligned_stack_free(PTR,sizeof(TYPE)*SIZE);} while(0)
#if EIGEN_ARCH_WANTS_ALIGNMENT
#if EIGEN_ALIGN
#ifdef EIGEN_EXCEPTIONS
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
void* operator new(size_t size, const std::nothrow_t&) throw() { \
try { return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); } \
catch (...) { return 0; } \
return 0; \
}
#else
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
void* operator new(size_t size, const std::nothrow_t&) throw() { \
return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); \
}
#endif
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
void *operator new(size_t size) throw() { \
void *operator new(size_t size) { \
return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); \
} \
void *operator new[](size_t size) throw() { \
void *operator new[](size_t size) { \
return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); \
} \
void operator delete(void * ptr) { Eigen::ei_conditional_aligned_free<NeedsToAlign>(ptr); } \
void operator delete[](void * ptr) { Eigen::ei_conditional_aligned_free<NeedsToAlign>(ptr); } \
void *operator new(size_t, void *ptr) throw() { return ptr; } \
void operator delete(void * ptr) throw() { Eigen::ei_conditional_aligned_free<NeedsToAlign>(ptr); } \
void operator delete[](void * ptr) throw() { Eigen::ei_conditional_aligned_free<NeedsToAlign>(ptr); } \
/* in-place new and delete. since (at least afaik) there is no actual */ \
/* memory allocated we can safely let the default implementation handle */ \
/* this particular case. */ \
static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \
/* nothrow-new (returns zero instead of std::bad_alloc) */ \
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
void operator delete(void *ptr, const std::nothrow_t&) throw() { \
Eigen::ei_conditional_aligned_free<NeedsToAlign>(ptr); \
} \
typedef void ei_operator_new_marker_type;
#else
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
@@ -289,34 +305,34 @@ public:
typedef aligned_allocator<U> other;
};
pointer address( reference value ) const
pointer address( reference value ) const
{
return &value;
}
const_pointer address( const_reference value ) const
const_pointer address( const_reference value ) const
{
return &value;
}
aligned_allocator() throw()
aligned_allocator() throw()
{
}
aligned_allocator( const aligned_allocator& ) throw()
aligned_allocator( const aligned_allocator& ) throw()
{
}
template<class U>
aligned_allocator( const aligned_allocator<U>& ) throw()
aligned_allocator( const aligned_allocator<U>& ) throw()
{
}
~aligned_allocator() throw()
~aligned_allocator() throw()
{
}
size_type max_size() const throw()
size_type max_size() const throw()
{
return std::numeric_limits<size_type>::max();
}
@@ -327,17 +343,17 @@ public:
return static_cast<pointer>( ei_aligned_malloc( num * sizeof(T) ) );
}
void construct( pointer p, const T& value )
void construct( pointer p, const T& value )
{
::new( p ) T( value );
}
void destroy( pointer p )
void destroy( pointer p )
{
p->~T();
}
void deallocate( pointer p, size_type /*num*/ )
void deallocate( pointer p, size_type /*num*/ )
{
ei_aligned_free( p );
}

View File

@@ -64,6 +64,14 @@ template<typename T> struct ei_cleantype<T&> { typedef typename ei_cleant
template<typename T> struct ei_cleantype<const T*> { typedef typename ei_cleantype<T>::type type; };
template<typename T> struct ei_cleantype<T*> { typedef typename ei_cleantype<T>::type type; };
/** \internal Allows to enable/disable an overload
* according to a compile time condition.
*/
template<bool Condition, typename T> struct ei_enable_if;
template<typename T> struct ei_enable_if<true,T>
{ typedef T type; };
/** \internal
* Convenient struct to get the result type of a unary or binary functor.
*

View File

@@ -73,7 +73,9 @@
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY,
THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES,
THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES,
INVALID_MATRIX_TEMPLATE_PARAMETERS
INVALID_MATRIX_TEMPLATE_PARAMETERS,
BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX
};
};

View File

@@ -73,11 +73,7 @@ template<typename T> struct ei_functor_traits
};
};
template<typename T> struct ei_packet_traits
{
typedef T type;
enum {size=1};
};
template<typename T> struct ei_packet_traits;
template<typename T> struct ei_unpacket_traits
{
@@ -93,7 +89,7 @@ class ei_compute_matrix_flags
inner_max_size = row_major_bit ? MaxCols : MaxRows,
is_big = inner_max_size == Dynamic,
is_packet_size_multiple = (Cols*Rows) % ei_packet_traits<Scalar>::size == 0,
aligned_bit = ((Options&AutoAlign) && (is_big || is_packet_size_multiple)) ? AlignedBit : 0,
aligned_bit = (((Options&DontAlign)==0) && (is_big || is_packet_size_multiple)) ? AlignedBit : 0,
packet_access_bit = ei_packet_traits<Scalar>::size > 1 && aligned_bit ? PacketAccessBit : 0
};
@@ -202,6 +198,28 @@ template<unsigned int Flags> struct ei_are_flags_consistent
};
};
/** \internal Helper base class to add a scalar multiple operator
* overloads for complex types */
template<typename Derived,typename Scalar,typename OtherScalar,
bool EnableIt = !ei_is_same_type<Scalar,OtherScalar>::ret >
struct ei_special_scalar_op_base
{
// dummy operator* so that the
// "using ei_special_scalar_op_base::operator*" compiles
void operator*() const;
};
template<typename Derived,typename Scalar,typename OtherScalar>
struct ei_special_scalar_op_base<Derived,Scalar,OtherScalar,true>
{
const CwiseUnaryOp<ei_scalar_multiple2_op<Scalar,OtherScalar>, Derived>
operator*(const OtherScalar& scalar) const
{
return CwiseUnaryOp<ei_scalar_multiple2_op<Scalar,OtherScalar>, Derived>
(*static_cast<const Derived*>(this), ei_scalar_multiple2_op<Scalar,OtherScalar>(scalar));
}
};
/** \internal Gives the type of a sub-matrix or sub-vector of a matrix of type \a ExpressionType and size \a Size
* TODO: could be a good idea to define a big ReturnType struct ??
*/
@@ -211,9 +229,26 @@ template<typename ExpressionType, int RowsOrSize=Dynamic, int Cols=Dynamic> stru
typedef Block<ExpressionType, RowsOrSize, Cols> Type;
};
template<typename CurrentType, typename NewType> struct ei_cast_return_type
template<typename ExpressionType> struct HNormalizedReturnType {
enum {
SizeAtCompileTime = ExpressionType::SizeAtCompileTime,
SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
};
typedef Block<ExpressionType,
ei_traits<ExpressionType>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
ei_traits<ExpressionType>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> StartMinusOne;
typedef CwiseUnaryOp<ei_scalar_quotient1_op<typename ei_traits<ExpressionType>::Scalar>,
NestByValue<StartMinusOne> > Type;
};
template<typename XprType, typename CastType> struct ei_cast_return_type
{
typedef typename ei_meta_if<ei_is_same_type<CurrentType,NewType>::ret,const CurrentType&,NewType>::ret type;
typedef typename XprType::Scalar CurrentScalarType;
typedef typename ei_cleantype<CastType>::type _CastType;
typedef typename _CastType::Scalar NewScalarType;
typedef typename ei_meta_if<ei_is_same_type<CurrentScalarType,NewScalarType>::ret,
const XprType&,CastType>::ret type;
};
#endif // EIGEN_XPRHELPER_H

View File

@@ -41,7 +41,7 @@ template <typename _Scalar, int _AmbientDim>
class AlignedBox
{
public:
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==Dynamic ? Dynamic : _AmbientDim+1)
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
enum { AmbientDimAtCompileTime = _AmbientDim };
typedef _Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
@@ -85,6 +85,9 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==
/** \returns a non const reference to the maximal corner */
inline VectorType& max() { return m_max; }
/** \returns the center of the box */
inline VectorType center() const { return (m_min + m_max) / 2; }
/** \returns true if the point \a p is inside the box \c *this. */
inline bool contains(const VectorType& p) const
{ return (m_min.cwise()<=p).all() && (p.cwise()<=m_max).all(); }
@@ -105,6 +108,14 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==
inline AlignedBox& clamp(const AlignedBox& b)
{ m_min = m_min.cwise().max(b.m_min); m_max = m_max.cwise().min(b.m_max); return *this; }
/** Returns an AlignedBox that is the intersection of \a b and \c *this */
inline AlignedBox intersection(const AlignedBox &b) const
{ return AlignedBox(m_min.cwise().max(b.m_min), m_max.cwise().min(b.m_max)); }
/** Returns an AlignedBox that is the union of \a b and \c *this */
inline AlignedBox merged(const AlignedBox &b) const
{ return AlignedBox(m_min.cwise().min(b.m_min), m_max.cwise().max(b.m_max)); }
/** Translate \c *this by the vector \a t and returns a reference to \c *this. */
inline AlignedBox& translate(const VectorType& t)
{ m_min += t; m_max += t; return *this; }
@@ -115,6 +126,12 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==
*/
inline Scalar squaredExteriorDistance(const VectorType& p) const;
/** \returns the squared distance between the boxes \a b and \c *this,
* and zero if the boxes intersect.
* \sa exteriorDistance()
*/
inline Scalar squaredExteriorDistance(const AlignedBox& b) const;
/** \returns the distance between the point \a p and the box \c *this,
* and zero if \a p is inside the box.
* \sa squaredExteriorDistance()
@@ -122,6 +139,13 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==
inline Scalar exteriorDistance(const VectorType& p) const
{ return ei_sqrt(squaredExteriorDistance(p)); }
/** \returns the distance between the boxes \a b and \c *this,
* and zero if the boxes intersect.
* \sa squaredExteriorDistance()
*/
inline Scalar exteriorDistance(const AlignedBox& b) const
{ return ei_sqrt(squaredExteriorDistance(b)); }
/** \returns \c *this with scalar type casted to \a NewScalarType
*
* Note that if \a NewScalarType is equal to the current scalar type of \c *this
@@ -170,4 +194,19 @@ inline Scalar AlignedBox<Scalar,AmbiantDim>::squaredExteriorDistance(const Vecto
return dist2;
}
template<typename Scalar,int AmbiantDim>
inline Scalar AlignedBox<Scalar,AmbiantDim>::squaredExteriorDistance(const AlignedBox& b) const
{
Scalar dist2 = 0.;
Scalar aux;
for (int k=0; k<dim(); ++k)
{
if ((aux = (b.m_min[k]-m_max[k]))>0.)
dist2 += aux*aux;
else if ( (aux = (m_min[k]-b.m_max[k]))>0. )
dist2 += aux*aux;
}
return dist2;
}
#endif // EIGEN_ALIGNEDBOX_H

View File

@@ -33,6 +33,8 @@
*
* \param _Scalar the scalar type, i.e., the type of the coefficients.
*
* \warning When setting up an AngleAxis object, the axis vector \b must \b be \b normalized.
*
* The following two typedefs are provided for convenience:
* \li \c AngleAxisf for \c float
* \li \c AngleAxisd for \c double
@@ -82,7 +84,10 @@ public:
/** Default constructor without initialization. */
AngleAxis() {}
/** Constructs and initialize the angle-axis rotation from an \a angle in radian
* and an \a axis which must be normalized. */
* and an \a axis which \b must \b be \b normalized.
*
* \warning If the \a axis vector is not normalized, then the angle-axis object
* represents an invalid rotation. */
template<typename Derived>
inline AngleAxis(Scalar angle, const MatrixBase<Derived>& axis) : m_axis(axis), m_angle(angle) {}
/** Constructs and initialize the angle-axis rotation from a quaternion \a q. */
@@ -109,18 +114,6 @@ public:
friend inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b)
{ return a * QuaternionType(b); }
/** Concatenates two rotations */
inline Matrix3 operator* (const Matrix3& other) const
{ return toRotationMatrix() * other; }
/** Concatenates two rotations */
inline friend Matrix3 operator* (const Matrix3& a, const AngleAxis& b)
{ return a * b.toRotationMatrix(); }
/** Applies rotation to vector */
inline Vector3 operator* (const Vector3& other) const
{ return toRotationMatrix() * other; }
/** \returns the inverse rotation, i.e., an angle-axis with opposite rotation angle */
AngleAxis inverse() const
{ return AngleAxis(-m_angle, m_axis); }

View File

@@ -2,5 +2,7 @@ FILE(GLOB Eigen_Geometry_SRCS "*.h")
INSTALL(FILES
${Eigen_Geometry_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Geometry
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Geometry COMPONENT Devel
)
ADD_SUBDIRECTORY(arch)

View File

@@ -0,0 +1,260 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_HOMOGENEOUS_H
#define EIGEN_HOMOGENEOUS_H
/** \geometry_module \ingroup Geometry_Module
* \nonstableyet
* \class Homogeneous
*
* \brief Expression of one (or a set of) homogeneous vector(s)
*
* \param MatrixType the type of the object in which we are making homogeneous
*
* This class represents an expression of one (or a set of) homogeneous vector(s).
* It is the return type of MatrixBase::homogeneous() and most of the time
* this is the only way it is used.
*
* \sa MatrixBase::homogeneous()
*/
template<typename MatrixType,int Direction>
struct ei_traits<Homogeneous<MatrixType,Direction> >
{
typedef typename MatrixType::Scalar Scalar;
typedef typename ei_nested<MatrixType>::type MatrixTypeNested;
typedef typename ei_unref<MatrixTypeNested>::type _MatrixTypeNested;
enum {
RowsPlusOne = (MatrixType::RowsAtCompileTime != Dynamic) ?
int(MatrixType::RowsAtCompileTime) + 1 : Dynamic,
ColsPlusOne = (MatrixType::ColsAtCompileTime != Dynamic) ?
int(MatrixType::ColsAtCompileTime) + 1 : Dynamic,
RowsAtCompileTime = Direction==Vertical ? RowsPlusOne : MatrixType::RowsAtCompileTime,
ColsAtCompileTime = Direction==Horizontal ? ColsPlusOne : MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = RowsAtCompileTime,
MaxColsAtCompileTime = ColsAtCompileTime,
Flags = _MatrixTypeNested::Flags & HereditaryBits,
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
};
};
template<typename MatrixType,typename Lhs> struct ei_homogeneous_left_product_impl;
template<typename MatrixType,typename Rhs> struct ei_homogeneous_right_product_impl;
template<typename MatrixType,int Direction> class Homogeneous
: public MatrixBase<Homogeneous<MatrixType,Direction> >
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Homogeneous)
inline Homogeneous(const MatrixType& matrix)
: m_matrix(matrix)
{}
inline int rows() const { return m_matrix.rows() + (Direction==Vertical ? 1 : 0); }
inline int cols() const { return m_matrix.cols() + (Direction==Horizontal ? 1 : 0); }
inline Scalar coeff(int row, int col) const
{
if( (Direction==Vertical && row==m_matrix.rows())
|| (Direction==Horizontal && col==m_matrix.cols()))
return 1;
return m_matrix.coeff(row, col);
}
template<typename Rhs>
inline const ei_homogeneous_right_product_impl<Homogeneous,Rhs>
operator* (const MatrixBase<Rhs>& rhs) const
{
ei_assert(Direction==Horizontal);
return ei_homogeneous_right_product_impl<Homogeneous,Rhs>(m_matrix,rhs.derived());
}
template<typename Lhs> friend
inline const ei_homogeneous_left_product_impl<Homogeneous,Lhs>
operator* (const MatrixBase<Lhs>& lhs, const Homogeneous& rhs)
{
ei_assert(Direction==Vertical);
return ei_homogeneous_left_product_impl<Homogeneous,Lhs>(lhs.derived(),rhs.m_matrix);
}
template<typename Scalar, int Dim, int Mode> friend
inline const ei_homogeneous_left_product_impl<Homogeneous,
typename Transform<Scalar,Dim,Mode>::AffinePart>
operator* (const Transform<Scalar,Dim,Mode>& tr, const Homogeneous& rhs)
{
ei_assert(Direction==Vertical);
return ei_homogeneous_left_product_impl<Homogeneous,typename Transform<Scalar,Dim,Mode>::AffinePart>
(tr.affine(),rhs.m_matrix);
}
template<typename Scalar, int Dim> friend
inline const ei_homogeneous_left_product_impl<Homogeneous,
typename Transform<Scalar,Dim,Projective>::MatrixType>
operator* (const Transform<Scalar,Dim,Projective>& tr, const Homogeneous& rhs)
{
ei_assert(Direction==Vertical);
return ei_homogeneous_left_product_impl<Homogeneous,typename Transform<Scalar,Dim,Projective>::MatrixType>
(tr.matrix(),rhs.m_matrix);
}
protected:
const typename MatrixType::Nested m_matrix;
};
/** \geometry_module
* \nonstableyet
* \return an expression of the equivalent homogeneous vector
*
* \vectoronly
*
* Example: \include MatrixBase_homogeneous.cpp
* Output: \verbinclude MatrixBase_homogeneous.out
*
* \sa class Homogeneous
*/
template<typename Derived>
inline const typename MatrixBase<Derived>::HomogeneousReturnType
MatrixBase<Derived>::homogeneous() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
return derived();
}
/** \geometry_module
* \nonstableyet
* \returns a matrix expression of homogeneous column (or row) vectors
*
* Example: \include PartialRedux_homogeneous.cpp
* Output: \verbinclude PartialRedux_homogeneous.out
*
* \sa MatrixBase::homogeneous() */
template<typename ExpressionType, int Direction>
inline const Homogeneous<ExpressionType,Direction>
PartialRedux<ExpressionType,Direction>::homogeneous() const
{
return _expression();
}
/** \geometry_module
* \nonstableyet
* \returns an expression of the homogeneous normalized vector of \c *this
*
* Example: \include MatrixBase_hnormalized.cpp
* Output: \verbinclude MatrixBase_hnormalized.out
*
* \sa PartialRedux::hnormalized() */
template<typename Derived>
inline const typename MatrixBase<Derived>::HNormalizedReturnType
MatrixBase<Derived>::hnormalized() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
return StartMinusOne(derived(),0,0,
ColsAtCompileTime==1?size()-1:1,
ColsAtCompileTime==1?1:size()-1).nestByValue() / coeff(size()-1);
}
/** \geometry_module
* \nonstableyet
* \returns an expression of the homogeneous normalized vector of \c *this
*
* Example: \include DirectionWise_hnormalized.cpp
* Output: \verbinclude DirectionWise_hnormalized.out
*
* \sa MatrixBase::hnormalized() */
template<typename ExpressionType, int Direction>
inline const typename PartialRedux<ExpressionType,Direction>::HNormalizedReturnType
PartialRedux<ExpressionType,Direction>::hnormalized() const
{
return HNormalized_Block(_expression(),0,0,
Direction==Vertical ? _expression().rows()-1 : _expression().rows(),
Direction==Horizontal ? _expression().cols()-1 : _expression().cols()).nestByValue()
.cwise()/
Replicate<NestByValue<HNormalized_Factors>,
Direction==Vertical ? HNormalized_SizeMinusOne : 1,
Direction==Horizontal ? HNormalized_SizeMinusOne : 1>
(HNormalized_Factors(_expression(),
Direction==Vertical ? _expression().rows()-1:0,
Direction==Horizontal ? _expression().cols()-1:0,
Direction==Vertical ? 1 : _expression().rows(),
Direction==Horizontal ? 1 : _expression().cols()).nestByValue(),
Direction==Vertical ? _expression().rows()-1 : 1,
Direction==Horizontal ? _expression().cols()-1 : 1).nestByValue();
}
template<typename MatrixType,typename Lhs>
struct ei_homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs>
: public ReturnByValue<ei_homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs>,
Matrix<typename ei_traits<MatrixType>::Scalar,
Lhs::RowsAtCompileTime,MatrixType::ColsAtCompileTime> >
{
typedef typename ei_cleantype<typename Lhs::Nested>::type LhsNested;
ei_homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs)
: m_lhs(lhs), m_rhs(rhs)
{}
template<typename Dest> void evalTo(Dest& dst) const
{
// FIXME investigate how to allow lazy evaluation of this product when possible
dst = Block<LhsNested,
LhsNested::RowsAtCompileTime,
LhsNested::ColsAtCompileTime==Dynamic?Dynamic:LhsNested::ColsAtCompileTime-1>
(m_lhs,0,0,m_lhs.rows(),m_lhs.cols()-1) * m_rhs;
dst += m_lhs.col(m_lhs.cols()-1).rowwise()
.template replicate<MatrixType::ColsAtCompileTime>(m_rhs.cols());
}
const typename Lhs::Nested m_lhs;
const typename MatrixType::Nested m_rhs;
};
template<typename MatrixType,typename Rhs>
struct ei_homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs>
: public ReturnByValue<ei_homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs>,
Matrix<typename ei_traits<MatrixType>::Scalar,
MatrixType::RowsAtCompileTime, Rhs::ColsAtCompileTime> >
{
typedef typename ei_cleantype<typename Rhs::Nested>::type RhsNested;
ei_homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs)
{}
template<typename Dest> void evalTo(Dest& dst) const
{
// FIXME investigate how to allow lazy evaluation of this product when possible
dst = m_lhs * Block<RhsNested,
RhsNested::RowsAtCompileTime==Dynamic?Dynamic:RhsNested::RowsAtCompileTime-1,
RhsNested::ColsAtCompileTime>
(m_rhs,0,0,m_rhs.rows()-1,m_rhs.cols());
dst += m_rhs.row(m_rhs.rows()-1).colwise()
.template replicate<MatrixType::RowsAtCompileTime>(m_lhs.rows());
}
const typename MatrixType::Nested m_lhs;
const typename Rhs::Nested m_rhs;
};
#endif // EIGEN_HOMOGENEOUS_H

View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
@@ -31,6 +31,7 @@
* \returns the cross product of \c *this and \a other
*
* Here is a very good explanation of cross-product: http://xkcd.com/199/
* \sa MatrixBase::cross3()
*/
template<typename Derived>
template<typename OtherDerived>
@@ -38,6 +39,7 @@ inline typename MatrixBase<Derived>::PlainMatrixType
MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,3)
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)
// Note that there is no need for an expression here since the compiler
// optimize such a small temporary very well (even within a complex expression)
@@ -50,15 +52,116 @@ MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
);
}
template< int Arch,typename VectorLhs,typename VectorRhs,
typename Scalar = typename VectorLhs::Scalar,
int Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
struct ei_cross3_impl {
inline static typename ei_plain_matrix_type<VectorLhs>::type
run(const VectorLhs& lhs, const VectorRhs& rhs)
{
return typename ei_plain_matrix_type<VectorLhs>::type(
lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1),
lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2),
lhs.coeff(0) * rhs.coeff(1) - lhs.coeff(1) * rhs.coeff(0),
0
);
}
};
/** \geometry_module
*
* \returns the cross product of \c *this and \a other using only the x, y, and z coefficients
*
* The size of \c *this and \a other must be four. This function is especially useful
* when using 4D vectors instead of 3D ones to get advantage of SSE/AltiVec vectorization.
*
* \sa MatrixBase::cross()
*/
template<typename Derived>
template<typename OtherDerived>
inline typename MatrixBase<Derived>::PlainMatrixType
MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,4)
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,4)
typedef typename ei_nested<Derived,2>::type DerivedNested;
typedef typename ei_nested<OtherDerived,2>::type OtherDerivedNested;
const DerivedNested lhs(derived());
const OtherDerivedNested rhs(other.derived());
return ei_cross3_impl<EiArch,typename ei_cleantype<DerivedNested>::type,
typename ei_cleantype<OtherDerivedNested>::type>::run(lhs,rhs);
}
/** \returns a matrix expression of the cross product of each column or row
* of the referenced expression with the \a other vector.
*
* The referenced matrix must have one dimension equal to 3.
* The result matrix has the same dimensions than the referenced one.
*
* \geometry_module
*
* \sa MatrixBase::cross() */
template<typename ExpressionType, int Direction>
template<typename OtherDerived>
const typename PartialRedux<ExpressionType,Direction>::CrossReturnType
PartialRedux<ExpressionType,Direction>::cross(const MatrixBase<OtherDerived>& other) const
{
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)
EIGEN_STATIC_ASSERT((ei_is_same_type<Scalar, typename OtherDerived::Scalar>::ret),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
CrossReturnType res(_expression().rows(),_expression().cols());
if(Direction==Vertical)
{
ei_assert(CrossReturnType::RowsAtCompileTime==3 && "the matrix must have exactly 3 rows");
res.row(0) = _expression().row(1) * other.coeff(2) - _expression().row(2) * other.coeff(1);
res.row(1) = _expression().row(2) * other.coeff(0) - _expression().row(0) * other.coeff(2);
res.row(2) = _expression().row(0) * other.coeff(1) - _expression().row(1) * other.coeff(0);
}
else
{
ei_assert(CrossReturnType::ColsAtCompileTime==3 && "the matrix must have exactly 3 columns");
res.col(0) = _expression().col(1) * other.coeff(2) - _expression().col(2) * other.coeff(1);
res.col(1) = _expression().col(2) * other.coeff(0) - _expression().col(0) * other.coeff(2);
res.col(2) = _expression().col(0) * other.coeff(1) - _expression().col(1) * other.coeff(0);
}
return res;
}
template<typename Derived, int Size = Derived::SizeAtCompileTime>
struct ei_unitOrthogonal_selector
{
typedef typename ei_plain_matrix_type<Derived>::type VectorType;
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef Matrix<Scalar,2,1> Vector2;
inline static VectorType run(const Derived& src)
{
VectorType perp(src.size());
VectorType perp = VectorType::Zero(src.size());
int maxi = 0;
int sndi = 0;
src.cwise().abs().maxCoeff(&maxi);
if (maxi==0)
sndi = 1;
RealScalar invnm = RealScalar(1)/(Vector2() << src.coeff(sndi),src.coeff(maxi)).finished().norm();
perp.coeffRef(maxi) = -ei_conj(src.coeff(sndi)) * invnm;
perp.coeffRef(sndi) = ei_conj(src.coeff(maxi)) * invnm;
return perp;
}
};
template<typename Derived>
struct ei_unitOrthogonal_selector<Derived,3>
{
typedef typename ei_plain_matrix_type<Derived>::type VectorType;
typedef typename ei_traits<Derived>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
inline static VectorType run(const Derived& src)
{
VectorType perp;
/* Let us compute the crossed product of *this with a vector
* that is not too close to being colinear to *this.
*/
@@ -85,9 +188,6 @@ struct ei_unitOrthogonal_selector
perp.coeffRef(1) = -ei_conj(src.z())*invnm;
perp.coeffRef(2) = ei_conj(src.y())*invnm;
}
if( (Derived::SizeAtCompileTime!=Dynamic && Derived::SizeAtCompileTime>3)
|| (Derived::SizeAtCompileTime==Dynamic && src.size()>3) )
perp.end(src.size()-3).setZero();
return perp;
}

View File

@@ -62,6 +62,8 @@ class Quaternion : public RotationBase<Quaternion<_Scalar>,3>
{
typedef RotationBase<Quaternion<_Scalar>,3> Base;
public:
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,4)
@@ -189,9 +191,6 @@ public:
Quaternion slerp(Scalar t, const Quaternion& other) const;
template<typename Derived>
Vector3 operator* (const MatrixBase<Derived>& vec) const;
/** \returns \c *this with scalar type casted to \a NewScalarType
*
* Note that if \a NewScalarType is equal to the current scalar type of \c *this
@@ -213,6 +212,8 @@ public:
bool isApprox(const Quaternion& other, typename NumTraits<Scalar>::Real prec = precision<Scalar>()) const
{ return m_coeffs.isApprox(other.m_coeffs, prec); }
Vector3 _transformVector(Vector3 v) const;
protected:
Coefficients m_coeffs;
};
@@ -224,17 +225,24 @@ typedef Quaternion<float> Quaternionf;
* double precision quaternion type */
typedef Quaternion<double> Quaterniond;
// Generic Quaternion * Quaternion product
template<int Arch,typename Scalar> inline Quaternion<Scalar>
ei_quaternion_product(const Quaternion<Scalar>& a, const Quaternion<Scalar>& b)
{
return Quaternion<Scalar>
(
a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(),
a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(),
a.w() * b.y() + a.y() * b.w() + a.z() * b.x() - a.x() * b.z(),
a.w() * b.z() + a.z() * b.w() + a.x() * b.y() - a.y() * b.x()
);
}
/** \returns the concatenation of two rotations as a quaternion-quaternion product */
template <typename Scalar>
inline Quaternion<Scalar> Quaternion<Scalar>::operator* (const Quaternion& other) const
{
return Quaternion
(
this->w() * other.w() - this->x() * other.x() - this->y() * other.y() - this->z() * other.z(),
this->w() * other.x() + this->x() * other.w() + this->y() * other.z() - this->z() * other.y(),
this->w() * other.y() + this->y() * other.w() + this->z() * other.x() - this->x() * other.z(),
this->w() * other.z() + this->z() * other.w() + this->x() * other.y() - this->y() * other.x()
);
return ei_quaternion_product<EiArch>(*this,other);
}
/** \sa operator*(Quaternion) */
@@ -252,17 +260,15 @@ inline Quaternion<Scalar>& Quaternion<Scalar>::operator*= (const Quaternion& oth
* - Via a Matrix3: 24 + 15n
*/
template <typename Scalar>
template<typename Derived>
inline typename Quaternion<Scalar>::Vector3
Quaternion<Scalar>::operator* (const MatrixBase<Derived>& v) const
Quaternion<Scalar>::_transformVector(Vector3 v) const
{
// Note that this algorithm comes from the optimization by hand
// of the conversion to a Matrix followed by a Matrix/Vector product.
// It appears to be much faster than the common algorithm found
// in the litterature (30 versus 39 flops). It also requires two
// Vector3 as temporaries.
Vector3 uv;
uv = 2 * this->vec().cross(v);
Vector3 uv = Scalar(2) * this->vec().cross(v);
return v + this->w() * uv + this->vec().cross(uv);
}
@@ -346,7 +352,6 @@ inline Quaternion<Scalar>& Quaternion<Scalar>::setFromTwoVectors(const MatrixBas
{
Vector3 v0 = a.normalized();
Vector3 v1 = b.normalized();
Vector3 axis = v0.cross(v1);
Scalar c = v0.dot(v1);
// if dot == 1, vectors are the same
@@ -354,7 +359,17 @@ inline Quaternion<Scalar>& Quaternion<Scalar>::setFromTwoVectors(const MatrixBas
{
// set to identity
this->w() = 1; this->vec().setZero();
return *this;
}
// if dot == -1, vectors are opposites
if (ei_isApprox(c,Scalar(-1)))
{
this->vec() = v0.unitOrthogonal();
this->w() = 0;
return *this;
}
Vector3 axis = v0.cross(v1);
Scalar s = ei_sqrt((Scalar(1)+c)*Scalar(2));
Scalar invs = Scalar(1)/s;
this->vec() = axis * invs;

View File

@@ -42,10 +42,31 @@ class RotationBase
enum { Dim = _Dim };
/** the scalar type of the coefficients */
typedef typename ei_traits<Derived>::Scalar Scalar;
/** corresponding linear transformation matrix type */
typedef Matrix<Scalar,Dim,Dim> RotationMatrixType;
typedef Matrix<Scalar,Dim,1> VectorType;
protected:
template<typename MatrixType, bool IsVector=MatrixType::IsVectorAtCompileTime>
struct generic_product_selector
{
typedef RotationMatrixType ReturnType;
inline static RotationMatrixType run(const Derived& r, const MatrixType& m)
{ return r.toRotationMatrix() * m; }
};
template<typename OtherVectorType>
struct generic_product_selector<OtherVectorType,true>
{
typedef VectorType ReturnType;
inline static VectorType run(const Derived& r, const OtherVectorType& v)
{
return r._transformVector(v);
}
};
public:
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
inline Derived& derived() { return *static_cast<Derived*>(this); }
@@ -59,13 +80,33 @@ class RotationBase
inline Transform<Scalar,Dim> operator*(const Translation<Scalar,Dim>& t) const
{ return toRotationMatrix() * t; }
/** \returns the concatenation of the rotation \c *this with a scaling \a s */
inline RotationMatrixType operator*(const Scaling<Scalar,Dim>& s) const
{ return toRotationMatrix() * s; }
/** \returns the concatenation of the rotation \c *this with a uniform scaling \a s */
inline RotationMatrixType operator*(const UniformScaling<Scalar>& s) const
{ return toRotationMatrix() * s.factor(); }
/** \returns the concatenation of the rotation \c *this with an affine transformation \a t */
inline Transform<Scalar,Dim> operator*(const Transform<Scalar,Dim>& t) const
/** \returns the concatenation of the rotation \c *this with a generic expression \a e
* \a e can be:
* - a DimxDim linear transformation matrix (including an axis aligned scaling)
* - a vector of size Dim
*/
template<typename OtherDerived>
inline typename generic_product_selector<OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType
operator*(const MatrixBase<OtherDerived>& e) const
{ return generic_product_selector<OtherDerived>::run(derived(), e.derived()); }
/** \returns the concatenation of a linear transformation \a l with the rotation \a r */
template<typename OtherDerived> friend
inline RotationMatrixType operator*(const MatrixBase<OtherDerived>& l, const Derived& r)
{ return l.derived() * r.toRotationMatrix(); }
/** \returns the concatenation of the rotation \c *this with a transformation \a t */
template<int Mode>
inline Transform<Scalar,Dim,Mode> operator*(const Transform<Scalar,Dim,Mode>& t) const
{ return toRotationMatrix() * t; }
template<typename OtherVectorType>
inline VectorType _transformVector(const OtherVectorType& v) const
{ return toRotationMatrix() * v; }
};
/** \geometry_module

View File

@@ -29,102 +29,65 @@
*
* \class Scaling
*
* \brief Represents a possibly non uniform scaling transformation
* \brief Represents a generic uniform scaling transformation
*
* \param _Scalar the scalar type, i.e., the type of the coefficients.
* \param _Dim the dimension of the space, can be a compile time value or Dynamic
*
* \note This class is not aimed to be used to store a scaling transformation,
* This class represent a uniform scaling transformation. It is the return
* type of Scaling(Scalar), and most of the time this is the only way it
* is used. In particular, this class is not aimed to be used to store a scaling transformation,
* but rather to make easier the constructions and updates of Transform objects.
*
* \sa class Translation, class Transform
* To represent an axis aligned scaling, use the DiagonalMatrix class.
*
* \sa Scaling(), class DiagonalMatrix, MatrixBase::asDiagonal(), class Translation, class Transform
*/
template<typename _Scalar, int _Dim>
class Scaling
template<typename _Scalar>
class UniformScaling
{
public:
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Dim)
/** dimension of the space */
enum { Dim = _Dim };
/** the scalar type of the coefficients */
typedef _Scalar Scalar;
/** corresponding vector type */
typedef Matrix<Scalar,Dim,1> VectorType;
/** corresponding linear transformation matrix type */
typedef Matrix<Scalar,Dim,Dim> LinearMatrixType;
/** corresponding translation type */
typedef Translation<Scalar,Dim> TranslationType;
/** corresponding affine transformation type */
typedef Transform<Scalar,Dim> TransformType;
protected:
VectorType m_coeffs;
Scalar m_factor;
public:
/** Default constructor without initialization. */
Scaling() {}
UniformScaling() {}
/** Constructs and initialize a uniform scaling transformation */
explicit inline Scaling(const Scalar& s) { m_coeffs.setConstant(s); }
/** 2D only */
inline Scaling(const Scalar& sx, const Scalar& sy)
{
ei_assert(Dim==2);
m_coeffs.x() = sx;
m_coeffs.y() = sy;
}
/** 3D only */
inline Scaling(const Scalar& sx, const Scalar& sy, const Scalar& sz)
{
ei_assert(Dim==3);
m_coeffs.x() = sx;
m_coeffs.y() = sy;
m_coeffs.z() = sz;
}
/** Constructs and initialize the scaling transformation from a vector of scaling coefficients */
explicit inline Scaling(const VectorType& coeffs) : m_coeffs(coeffs) {}
explicit inline UniformScaling(const Scalar& s) : m_factor(s) {}
const VectorType& coeffs() const { return m_coeffs; }
VectorType& coeffs() { return m_coeffs; }
inline const Scalar& factor() const { return m_factor; }
inline Scalar& factor() { return m_factor; }
/** Concatenates two scaling */
inline Scaling operator* (const Scaling& other) const
{ return Scaling(coeffs().cwise() * other.coeffs()); }
/** Concatenates two uniform scaling */
inline UniformScaling operator* (const UniformScaling& other) const
{ return UniformScaling(m_factor * other.factor()); }
/** Concatenates a scaling and a translation */
inline TransformType operator* (const TranslationType& t) const;
/** Concatenates a uniform scaling and a translation */
template<int Dim>
inline Transform<Scalar,Dim> operator* (const Translation<Scalar,Dim>& t) const;
/** Concatenates a scaling and an affine transformation */
inline TransformType operator* (const TransformType& t) const;
/** Concatenates a uniform scaling and an affine transformation */
template<int Dim>
inline Transform<Scalar,Dim> operator* (const Transform<Scalar,Dim>& t) const;
/** Concatenates a scaling and a linear transformation matrix */
/** Concatenates a uniform scaling and a linear transformation matrix */
// TODO returns an expression
inline LinearMatrixType operator* (const LinearMatrixType& other) const
{ return coeffs().asDiagonal() * other; }
/** Concatenates a linear transformation matrix and a scaling */
// TODO returns an expression
friend inline LinearMatrixType operator* (const LinearMatrixType& other, const Scaling& s)
{ return other * s.coeffs().asDiagonal(); }
template<typename Derived>
inline LinearMatrixType operator*(const RotationBase<Derived,Dim>& r) const
{ return *this * r.toRotationMatrix(); }
inline typename ei_plain_matrix_type<Derived>::type operator* (const MatrixBase<Derived>& other) const
{ return other * m_factor; }
/** Applies scaling to vector */
inline VectorType operator* (const VectorType& other) const
{ return coeffs().asDiagonal() * other; }
template<typename Derived,int Dim>
inline Matrix<Scalar,Dim,Dim> operator*(const RotationBase<Derived,Dim>& r) const
{ return r.toRotationMatrix() * m_factor; }
/** \returns the inverse scaling */
inline Scaling inverse() const
{ return Scaling(coeffs().cwise().inverse()); }
inline Scaling& operator=(const Scaling& other)
{
m_coeffs = other.m_coeffs;
return *this;
}
inline UniformScaling inverse() const
{ return UniformScaling(Scalar(1)/m_factor); }
/** \returns \c *this with scalar type casted to \a NewScalarType
*
@@ -132,50 +95,65 @@ public:
* then this function smartly returns a const reference to \c *this.
*/
template<typename NewScalarType>
inline typename ei_cast_return_type<Scaling,Scaling<NewScalarType,Dim> >::type cast() const
{ return typename ei_cast_return_type<Scaling,Scaling<NewScalarType,Dim> >::type(*this); }
inline UniformScaling<NewScalarType> cast() const
{ return UniformScaling<NewScalarType>(NewScalarType(m_factor)); }
/** Copy constructor with scalar type conversion */
template<typename OtherScalarType>
inline explicit Scaling(const Scaling<OtherScalarType,Dim>& other)
{ m_coeffs = other.coeffs().template cast<Scalar>(); }
inline explicit UniformScaling(const UniformScaling<OtherScalarType>& other)
{ m_factor = Scalar(other.factor()); }
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
* determined by \a prec.
*
* \sa MatrixBase::isApprox() */
bool isApprox(const Scaling& other, typename NumTraits<Scalar>::Real prec = precision<Scalar>()) const
{ return m_coeffs.isApprox(other.m_coeffs, prec); }
bool isApprox(const UniformScaling& other, typename NumTraits<Scalar>::Real prec = precision<Scalar>()) const
{ return ei_isApprox(m_factor, other.factor(), prec); }
};
/** Concatenates a linear transformation matrix and a uniform scaling */
// NOTE this operator is defiend in MatrixBase and not as a friend function
// of UniformScaling to fix an internal crash of Intel's ICC
template<typename Derived> const typename MatrixBase<Derived>::ScalarMultipleReturnType
MatrixBase<Derived>::operator*(const UniformScaling<Scalar>& s) const
{ return derived() * s.factor(); }
/** Constructs a uniform scaling from scale factor \a s */
static inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }
/** Constructs a uniform scaling from scale factor \a s */
static inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); }
/** Constructs a uniform scaling from scale factor \a s */
template<typename RealScalar>
static inline UniformScaling<std::complex<RealScalar> > Scaling(const std::complex<RealScalar>& s)
{ return UniformScaling<std::complex<RealScalar> >(s); }
/** Constructs a 2D axis aligned scaling */
template<typename Scalar>
static inline DiagonalMatrix<Scalar,2> Scaling(Scalar sx, Scalar sy)
{ return DiagonalMatrix<Scalar,2>(sx, sy); }
/** Constructs a 3D axis aligned scaling */
template<typename Scalar>
static inline DiagonalMatrix<Scalar,3> Scaling(Scalar sx, Scalar sy, Scalar sz)
{ return DiagonalMatrix<Scalar,3>(sx, sy, sz); }
/** Constructs an axis aligned scaling expression from vector expression \a coeffs
* This is an alias for coeffs.asDiagonal()
*/
template<typename Derived>
static inline const DiagonalMatrixWrapper<Derived> Scaling(const MatrixBase<Derived>& coeffs)
{ return coeffs.asDiagonal(); }
/** \addtogroup Geometry_Module */
//@{
typedef Scaling<float, 2> Scaling2f;
typedef Scaling<double,2> Scaling2d;
typedef Scaling<float, 3> Scaling3f;
typedef Scaling<double,3> Scaling3d;
/** \deprecated */
typedef DiagonalMatrix<float, 2> AlignedScaling2f;
/** \deprecated */
typedef DiagonalMatrix<double,2> AlignedScaling2d;
/** \deprecated */
typedef DiagonalMatrix<float, 3> AlignedScaling3f;
/** \deprecated */
typedef DiagonalMatrix<double,3> AlignedScaling3d;
//@}
template<typename Scalar, int Dim>
inline typename Scaling<Scalar,Dim>::TransformType
Scaling<Scalar,Dim>::operator* (const TranslationType& t) const
{
TransformType res;
res.matrix().setZero();
res.linear().diagonal() = coeffs();
res.translation() = m_coeffs.cwise() * t.vector();
res(Dim,Dim) = Scalar(1);
return res;
}
template<typename Scalar, int Dim>
inline typename Scaling<Scalar,Dim>::TransformType
Scaling<Scalar,Dim>::operator* (const TransformType& t) const
{
TransformType res = t;
res.prescale(m_coeffs);
return res;
}
#endif // EIGEN_SCALING_H

File diff suppressed because it is too large Load Diff

View File

@@ -52,10 +52,8 @@ public:
typedef Matrix<Scalar,Dim,1> VectorType;
/** corresponding linear transformation matrix type */
typedef Matrix<Scalar,Dim,Dim> LinearMatrixType;
/** corresponding scaling transformation type */
typedef Scaling<Scalar,Dim> ScalingType;
/** corresponding affine transformation type */
typedef Transform<Scalar,Dim> TransformType;
typedef Transform<Scalar,Dim> AffineTransformType;
protected:
@@ -80,7 +78,7 @@ public:
m_coeffs.y() = sy;
m_coeffs.z() = sz;
}
/** Constructs and initialize the scaling transformation from a vector of scaling coefficients */
/** Constructs and initialize the translation transformation from a vector of translation coefficients */
explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {}
const VectorType& vector() const { return m_coeffs; }
@@ -90,31 +88,40 @@ public:
inline Translation operator* (const Translation& other) const
{ return Translation(m_coeffs + other.m_coeffs); }
/** Concatenates a translation and a scaling */
inline TransformType operator* (const ScalingType& other) const;
/** Concatenates a translation and a uniform scaling */
inline AffineTransformType operator* (const UniformScaling<Scalar>& other) const;
/** Concatenates a translation and a linear transformation */
inline TransformType operator* (const LinearMatrixType& linear) const;
template<typename OtherDerived>
inline AffineTransformType operator* (const MatrixBase<OtherDerived>& linear) const;
/** Concatenates a translation and a rotation */
template<typename Derived>
inline TransformType operator*(const RotationBase<Derived,Dim>& r) const
inline AffineTransformType operator*(const RotationBase<Derived,Dim>& r) const
{ return *this * r.toRotationMatrix(); }
/** Concatenates a linear transformation and a translation */
/** \returns the concatenation of a linear transformation \a l with the translation \a t */
// its a nightmare to define a templated friend function outside its declaration
friend inline TransformType operator* (const LinearMatrixType& linear, const Translation& t)
template<typename OtherDerived> friend
inline AffineTransformType operator*(const MatrixBase<OtherDerived>& linear, const Translation& t)
{
TransformType res;
AffineTransformType res;
res.matrix().setZero();
res.linear() = linear;
res.translation() = linear * t.m_coeffs;
res.linear() = linear.derived();
res.translation() = linear.derived() * t.m_coeffs;
res.matrix().row(Dim).setZero();
res(Dim,Dim) = Scalar(1);
return res;
}
/** Concatenates a translation and an affine transformation */
inline TransformType operator* (const TransformType& t) const;
template<int Mode>
inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim,Mode>& t) const
{
Transform<Scalar,Dim,Mode> res = t;
res.pretranslate(m_coeffs);
return res;
}
/** Applies translation to vector */
inline VectorType operator* (const VectorType& other) const
@@ -160,39 +167,30 @@ typedef Translation<float, 3> Translation3f;
typedef Translation<double,3> Translation3d;
//@}
template<typename Scalar, int Dim>
inline typename Translation<Scalar,Dim>::TransformType
Translation<Scalar,Dim>::operator* (const ScalingType& other) const
inline typename Translation<Scalar,Dim>::AffineTransformType
Translation<Scalar,Dim>::operator* (const UniformScaling<Scalar>& other) const
{
TransformType res;
AffineTransformType res;
res.matrix().setZero();
res.linear().diagonal() = other.coeffs();
res.linear().diagonal().fill(other.factor());
res.translation() = m_coeffs;
res(Dim,Dim) = Scalar(1);
return res;
}
template<typename Scalar, int Dim>
inline typename Translation<Scalar,Dim>::TransformType
Translation<Scalar,Dim>::operator* (const LinearMatrixType& linear) const
template<typename OtherDerived>
inline typename Translation<Scalar,Dim>::AffineTransformType
Translation<Scalar,Dim>::operator* (const MatrixBase<OtherDerived>& linear) const
{
TransformType res;
AffineTransformType res;
res.matrix().setZero();
res.linear() = linear;
res.linear() = linear.derived();
res.translation() = m_coeffs;
res.matrix().row(Dim).setZero();
res(Dim,Dim) = Scalar(1);
return res;
}
template<typename Scalar, int Dim>
inline typename Translation<Scalar,Dim>::TransformType
Translation<Scalar,Dim>::operator* (const TransformType& t) const
{
TransformType res = t;
res.pretranslate(m_coeffs);
return res;
}
#endif // EIGEN_TRANSLATION_H

View File

@@ -0,0 +1,6 @@
FILE(GLOB Eigen_Geometry_arch_SRCS "*.h")
INSTALL(FILES
${Eigen_Geometry_arch_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Geometry/arch COMPONENT Devel
)

View File

@@ -0,0 +1,63 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com>
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_GEOMETRY_SSE_H
#define EIGEN_GEOMETRY_SSE_H
template<> inline Quaternion<float>
ei_quaternion_product<EiArch_SSE,float>(const Quaternion<float>& _a, const Quaternion<float>& _b)
{
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0,0,0,0x80000000));
Quaternion<float> res;
__m128 a = _a.coeffs().packet<Aligned>(0);
__m128 b = _b.coeffs().packet<Aligned>(0);
__m128 flip1 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,2),
ei_vec4f_swizzle1(b,2,0,1,2)),mask);
__m128 flip2 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,3,3,3,1),
ei_vec4f_swizzle1(b,0,1,2,1)),mask);
ei_pstore(&res.x(),
_mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,ei_vec4f_swizzle1(b,3,3,3,3)),
_mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,0),
ei_vec4f_swizzle1(b,1,2,0,0))),
_mm_add_ps(flip1,flip2)));
return res;
}
template<typename VectorLhs,typename VectorRhs>
struct ei_cross3_impl<EiArch_SSE,VectorLhs,VectorRhs,float,true> {
inline static typename ei_plain_matrix_type<VectorLhs>::type
run(const VectorLhs& lhs, const VectorRhs& rhs)
{
__m128 a = lhs.coeffs().packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
__m128 b = rhs.coeffs().packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
__m128 mul1=_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,3),ei_vec4f_swizzle1(b,2,0,1,3));
__m128 mul2=_mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,3),ei_vec4f_swizzle1(b,1,2,0,3));
typename ei_plain_matrix_type<VectorLhs>::type res;
ei_pstore(&res.x(),_mm_sub_ps(mul1,mul2));
return res;
}
};
#endif // EIGEN_GEOMETRY_SSE_H

View File

@@ -2,5 +2,5 @@ FILE(GLOB Eigen_LU_SRCS "*.h")
INSTALL(FILES
${Eigen_LU_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU COMPONENT Devel
)

View File

@@ -51,7 +51,7 @@ template<typename Derived,
{
static inline typename ei_traits<Derived>::Scalar run(const Derived& m)
{
return m.lu().determinant();
return m.partialLu().determinant();
}
};

View File

@@ -170,8 +170,7 @@ struct ei_compute_inverse
{
static inline void run(const MatrixType& matrix, MatrixType* result)
{
LU<MatrixType> lu(matrix);
lu.computeInverse(result);
matrix.partialLu().computeInverse(result);
}
};

View File

@@ -323,6 +323,7 @@ template<typename MatrixType> class LU
IntRowVectorType m_q;
int m_det_pq;
int m_rank;
RealScalar m_precision;
};
template<typename MatrixType>
@@ -332,9 +333,13 @@ LU<MatrixType>::LU(const MatrixType& matrix)
m_p(matrix.rows()),
m_q(matrix.cols())
{
const int size = matrix.diagonal().size();
const int size = matrix.diagonalSize();
const int rows = matrix.rows();
const int cols = matrix.cols();
// this formula comes from experimenting (see "LU precision tuning" thread on the list)
// and turns out to be identical to Higham's formula used already in LDLt.
m_precision = machine_epsilon<Scalar>() * size;
IntColVectorType rows_transpositions(matrix.rows());
IntRowVectorType cols_transpositions(matrix.cols());
@@ -355,7 +360,7 @@ LU<MatrixType>::LU(const MatrixType& matrix)
if(k==0) biggest = biggest_in_corner;
// if the corner is negligible, then we have less than full rank, and we can finish early
if(ei_isMuchSmallerThan(biggest_in_corner, biggest))
if(ei_isMuchSmallerThan(biggest_in_corner, biggest, m_precision))
{
m_rank = k;
for(int i = k; i < size; i++)
@@ -397,7 +402,7 @@ LU<MatrixType>::LU(const MatrixType& matrix)
template<typename MatrixType>
typename ei_traits<MatrixType>::Scalar LU<MatrixType>::determinant() const
{
return Scalar(m_det_pq) * m_lu.diagonal().redux(ei_scalar_product_op<Scalar>());
return Scalar(m_det_pq) * m_lu.diagonal().prod();
}
template<typename MatrixType>
@@ -506,7 +511,7 @@ bool LU<MatrixType>::solve(
RealScalar biggest_in_c = c.corner(TopLeft, m_rank, c.cols()).cwise().abs().maxCoeff();
for(int col = 0; col < c.cols(); ++col)
for(int row = m_rank; row < c.rows(); ++row)
if(!ei_isMuchSmallerThan(c.coeff(row,col), biggest_in_c))
if(!ei_isMuchSmallerThan(c.coeff(row,col), biggest_in_c, m_precision))
return false;
}
m_lu.corner(TopLeft, m_rank, m_rank)

262
Eigen/src/LU/PartialLU.h Normal file
View File

@@ -0,0 +1,262 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_PARTIALLU_H
#define EIGEN_PARTIALLU_H
/** \ingroup LU_Module
*
* \class PartialLU
*
* \brief LU decomposition of a matrix with partial pivoting, and related features
*
* \param MatrixType the type of the matrix of which we are computing the LU decomposition
*
* This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A
* is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P
* is a permutation matrix.
*
* Typically, partial pivoting LU decomposition is only considered numerically stable for square invertible matrices.
* So in this class, we plainly require that and take advantage of that to do some simplifications and optimizations.
* This class will assert that the matrix is square, but it won't (actually it can't) check that the matrix is invertible:
* it is your task to check that you only use this decomposition on invertible matrices.
*
* The guaranteed safe alternative, working for all matrices, is the full pivoting LU decomposition, provided by class LU.
*
* This is \b not a rank-revealing LU decomposition. Many features are intentionally absent from this class,
* such as rank computation. If you need these features, use class LU.
*
* This LU decomposition is suitable to invert invertible matrices. It is what MatrixBase::inverse() uses. On the other hand,
* it is \b not suitable to determine whether a given matrix is invertible.
*
* The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP().
*
* \sa MatrixBase::partialLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class LU
*/
template<typename MatrixType> class PartialLU
{
public:
typedef typename MatrixType::Scalar Scalar;
typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
typedef Matrix<int, 1, MatrixType::ColsAtCompileTime> IntRowVectorType;
typedef Matrix<int, MatrixType::RowsAtCompileTime, 1> IntColVectorType;
typedef Matrix<Scalar, 1, MatrixType::ColsAtCompileTime> RowVectorType;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> ColVectorType;
enum { MaxSmallDimAtCompileTime = EIGEN_ENUM_MIN(
MatrixType::MaxColsAtCompileTime,
MatrixType::MaxRowsAtCompileTime)
};
/** Constructor.
*
* \param matrix the matrix of which to compute the LU decomposition.
*
* \warning The matrix should have full rank (e.g. if it's square, it should be invertible).
* If you need to deal with non-full rank, use class LU instead.
*/
PartialLU(const MatrixType& matrix);
/** \returns the LU decomposition matrix: the upper-triangular part is U, the
* unit-lower-triangular part is L (at least for square matrices; in the non-square
* case, special care is needed, see the documentation of class LU).
*
* \sa matrixL(), matrixU()
*/
inline const MatrixType& matrixLU() const
{
return m_lu;
}
/** \returns a vector of integers, whose size is the number of rows of the matrix being decomposed,
* representing the P permutation i.e. the permutation of the rows. For its precise meaning,
* see the examples given in the documentation of class LU.
*/
inline const IntColVectorType& permutationP() const
{
return m_p;
}
/** This method finds the solution x to the equation Ax=b, where A is the matrix of which
* *this is the LU decomposition. Since if this partial pivoting decomposition the matrix is assumed
* to have full rank, such a solution is assumed to exist and to be unique.
*
* \warning Again, if your matrix may not have full rank, use class LU instead. See LU::solve().
*
* \param b the right-hand-side of the equation to solve. Can be a vector or a matrix,
* the only requirement in order for the equation to make sense is that
* b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition.
* \param result a pointer to the vector or matrix in which to store the solution, if any exists.
* Resized if necessary, so that result->rows()==A.cols() and result->cols()==b.cols().
* If no solution exists, *result is left with undefined coefficients.
*
* Example: \include PartialLU_solve.cpp
* Output: \verbinclude PartialLU_solve.out
*
* \sa MatrixBase::solveTriangular(), inverse(), computeInverse()
*/
template<typename OtherDerived, typename ResultType>
void solve(const MatrixBase<OtherDerived>& b, ResultType *result) const;
/** \returns the determinant of the matrix of which
* *this is the LU decomposition. It has only linear complexity
* (that is, O(n) where n is the dimension of the square matrix)
* as the LU decomposition has already been computed.
*
* \note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers
* optimized paths.
*
* \warning a determinant can be very big or small, so for matrices
* of large enough dimension, there is a risk of overflow/underflow.
*
* \sa MatrixBase::determinant()
*/
typename ei_traits<MatrixType>::Scalar determinant() const;
/** Computes the inverse of the matrix of which *this is the LU decomposition.
*
* \param result a pointer to the matrix into which to store the inverse. Resized if needed.
*
* \warning The matrix being decomposed here is assumed to be invertible. If you need to check for
* invertibility, use class LU instead.
*
* \sa MatrixBase::computeInverse(), inverse()
*/
inline void computeInverse(MatrixType *result) const
{
solve(MatrixType::Identity(m_lu.rows(), m_lu.cols()), result);
}
/** \returns the inverse of the matrix of which *this is the LU decomposition.
*
* \warning The matrix being decomposed here is assumed to be invertible. If you need to check for
* invertibility, use class LU instead.
*
* \sa computeInverse(), MatrixBase::inverse()
*/
inline MatrixType inverse() const
{
MatrixType result;
computeInverse(&result);
return result;
}
protected:
const MatrixType& m_originalMatrix;
MatrixType m_lu;
IntColVectorType m_p;
int m_det_p;
};
template<typename MatrixType>
PartialLU<MatrixType>::PartialLU(const MatrixType& matrix)
: m_originalMatrix(matrix),
m_lu(matrix),
m_p(matrix.rows())
{
ei_assert(matrix.rows() == matrix.cols() && "PartialLU is only for square (and moreover invertible) matrices");
const int size = matrix.rows();
IntColVectorType rows_transpositions(size);
int number_of_transpositions = 0;
for(int k = 0; k < size; ++k)
{
int row_of_biggest_in_col;
m_lu.block(k,k,size-k,1).cwise().abs().maxCoeff(&row_of_biggest_in_col);
row_of_biggest_in_col += k;
rows_transpositions.coeffRef(k) = row_of_biggest_in_col;
if(k != row_of_biggest_in_col) {
m_lu.row(k).swap(m_lu.row(row_of_biggest_in_col));
++number_of_transpositions;
}
if(k<size-1) {
m_lu.col(k).end(size-k-1) /= m_lu.coeff(k,k);
for(int col = k + 1; col < size; ++col)
m_lu.col(col).end(size-k-1) -= m_lu.col(k).end(size-k-1) * m_lu.coeff(k,col);
}
}
for(int k = 0; k < size; ++k) m_p.coeffRef(k) = k;
for(int k = size-1; k >= 0; --k)
std::swap(m_p.coeffRef(k), m_p.coeffRef(rows_transpositions.coeff(k)));
m_det_p = (number_of_transpositions%2) ? -1 : 1;
}
template<typename MatrixType>
typename ei_traits<MatrixType>::Scalar PartialLU<MatrixType>::determinant() const
{
return Scalar(m_det_p) * m_lu.diagonal().prod();
}
template<typename MatrixType>
template<typename OtherDerived, typename ResultType>
void PartialLU<MatrixType>::solve(
const MatrixBase<OtherDerived>& b,
ResultType *result
) const
{
/* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
* So we proceed as follows:
* Step 1: compute c = Pb.
* Step 2: replace c by the solution x to Lx = c. Exists because L is invertible.
* Step 3: replace c by the solution x to Ux = c. Check if a solution really exists.
*/
const int size = m_lu.rows();
ei_assert(b.rows() == size);
result->resize(size, b.cols());
// Step 1
for(int i = 0; i < size; ++i) result->row(m_p.coeff(i)) = b.row(i);
// Step 2
m_lu.template marked<UnitLowerTriangular>()
.solveTriangularInPlace(*result);
// Step 3
m_lu.template marked<UpperTriangular>()
.solveTriangularInPlace(*result);
}
/** \lu_module
*
* \return the LU decomposition of \c *this.
*
* \sa class LU
*/
template<typename Derived>
inline const PartialLU<typename MatrixBase<Derived>::PlainMatrixType>
MatrixBase<Derived>::partialLu() const
{
return PartialLU<PlainMatrixType>(eval());
}
#endif // EIGEN_PARTIALLU_H

View File

@@ -2,5 +2,5 @@ FILE(GLOB Eigen_LeastSquares_SRCS "*.h")
INSTALL(FILES
${Eigen_LeastSquares_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LeastSquares
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LeastSquares COMPONENT Devel
)

View File

@@ -160,7 +160,6 @@ void fitHyperplane(int numPoints,
// compute the covariance matrix
CovMatrixType covMat = CovMatrixType::Zero(size, size);
VectorType remean = VectorType::Zero(size);
for(int i = 0; i < numPoints; ++i)
{
VectorType diff = (*(points[i]) - mean).conjugate();

View File

@@ -2,5 +2,5 @@ FILE(GLOB Eigen_QR_SRCS "*.h")
INSTALL(FILES
${Eigen_QR_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/QR
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/QR COMPONENT Devel
)

View File

@@ -58,10 +58,10 @@ template<typename _MatrixType> class HessenbergDecomposition
typedef Matrix<RealScalar, Size, 1> DiagonalType;
typedef Matrix<RealScalar, SizeMinusOne, 1> SubDiagonalType;
typedef typename NestByValue<DiagonalCoeffs<MatrixType> >::RealReturnType DiagonalReturnType;
typedef typename NestByValue<Diagonal<MatrixType,0> >::RealReturnType DiagonalReturnType;
typedef typename NestByValue<DiagonalCoeffs<
NestByValue<Block<MatrixType,SizeMinusOne,SizeMinusOne> > > >::RealReturnType SubDiagonalReturnType;
typedef typename NestByValue<Diagonal<
NestByValue<Block<MatrixType,SizeMinusOne,SizeMinusOne> >,0 > >::RealReturnType SubDiagonalReturnType;
/** This constructor initializes a HessenbergDecomposition object for
* further use with HessenbergDecomposition::compute()

View File

@@ -55,12 +55,64 @@ template<typename MatrixType> class QR
{
_compute(matrix);
}
/** \returns whether or not the matrix is of full rank */
bool isFullRank() const { return rank() == std::min(m_qr.rows(),m_qr.cols()); }
/** \deprecated use isInjective()
* \returns whether or not the matrix is of full rank
*
* \note Since the rank is computed only once, i.e. the first time it is needed, this
* method almost does not perform any further computation.
*/
EIGEN_DEPRECATED bool isFullRank() const { return rank() == m_qr.cols(); }
/** \returns the rank of the matrix of which *this is the QR decomposition.
*
* \note Since the rank is computed only once, i.e. the first time it is needed, this
* method almost does not perform any further computation.
*/
int rank() const;
/** \returns the dimension of the kernel of the matrix of which *this is the QR decomposition.
*
* \note Since the rank is computed only once, i.e. the first time it is needed, this
* method almost does not perform any further computation.
*/
inline int dimensionOfKernel() const
{
return m_qr.cols() - rank();
}
/** \returns true if the matrix of which *this is the QR decomposition represents an injective
* linear map, i.e. has trivial kernel; false otherwise.
*
* \note Since the rank is computed only once, i.e. the first time it is needed, this
* method almost does not perform any further computation.
*/
inline bool isInjective() const
{
return rank() == m_qr.cols();
}
/** \returns true if the matrix of which *this is the QR decomposition represents a surjective
* linear map; false otherwise.
*
* \note Since the rank is computed only once, i.e. the first time it is needed, this
* method almost does not perform any further computation.
*/
inline bool isSurjective() const
{
return rank() == m_qr.rows();
}
/** \returns true if the matrix of which *this is the QR decomposition is invertible.
*
* \note Since the rank is computed only once, i.e. the first time it is needed, this
* method almost does not perform any further computation.
*/
inline bool isInvertible() const
{
return isInjective() && isSurjective();
}
/** \returns a read-only expression of the matrix R of the actual the QR decomposition */
const Part<NestByValue<MatrixRBlockType>, UpperTriangular>
matrixR(void) const
@@ -69,6 +121,32 @@ template<typename MatrixType> class QR
return MatrixRBlockType(m_qr, 0, 0, cols, cols).nestByValue().template part<UpperTriangular>();
}
/** This method finds a solution x to the equation Ax=b, where A is the matrix of which
* *this is the QR decomposition, if any exists.
*
* \param b the right-hand-side of the equation to solve.
*
* \param result a pointer to the vector/matrix in which to store the solution, if any exists.
* Resized if necessary, so that result->rows()==A.cols() and result->cols()==b.cols().
* If no solution exists, *result is left with undefined coefficients.
*
* \returns true if any solution exists, false if no solution exists.
*
* \note If there exist more than one solution, this method will arbitrarily choose one.
* If you need a complete analysis of the space of solutions, take the one solution obtained
* by this method and add to it elements of the kernel, as determined by kernel().
*
* \note The case where b is a matrix is not yet implemented. Also, this
* code is space inefficient.
*
* Example: \include QR_solve.cpp
* Output: \verbinclude QR_solve.out
*
* \sa MatrixBase::solveTriangular(), kernel(), computeKernel(), inverse(), computeInverse()
*/
template<typename OtherDerived, typename ResultType>
bool solve(const MatrixBase<OtherDerived>& b, ResultType *result) const;
MatrixType matrixQ(void) const;
private:
@@ -88,12 +166,11 @@ int QR<MatrixType>::rank() const
{
if (!m_rankIsUptodate)
{
RealScalar maxCoeff = m_qr.diagonal().maxCoeff();
int n = std::min(m_qr.rows(),m_qr.cols());
m_rank = n;
for (int i=0; i<n; ++i)
if (ei_isMuchSmallerThan(m_qr.diagonal().coeff(i), maxCoeff))
--m_rank;
RealScalar maxCoeff = m_qr.diagonal().cwise().abs().maxCoeff();
int n = m_qr.cols();
m_rank = 0;
while(m_rank<n && !ei_isMuchSmallerThan(m_qr.diagonal().coeff(m_rank), maxCoeff))
++m_rank;
m_rankIsUptodate = true;
}
return m_rank;
@@ -108,6 +185,7 @@ void QR<MatrixType>::_compute(const MatrixType& matrix)
m_qr = matrix;
int rows = matrix.rows();
int cols = matrix.cols();
RealScalar eps2 = precision<RealScalar>()*precision<RealScalar>();
for (int k = 0; k < cols; ++k)
{
@@ -132,7 +210,8 @@ void QR<MatrixType>::_compute(const MatrixType& matrix)
m_hCoeffs.coeffRef(k) = 0;
}
}
else if ( (!ei_isMuchSmallerThan(beta=m_qr.col(k).end(remainingSize-1).squaredNorm(),static_cast<Scalar>(1))) || ei_imag(v0)==0 )
else if ((beta=m_qr.col(k).end(remainingSize-1).squaredNorm())>eps2)
// FIXME what about ei_imag(v0) ??
{
// form k-th Householder vector
beta = ei_sqrt(ei_abs2(v0)+beta);
@@ -160,9 +239,40 @@ void QR<MatrixType>::_compute(const MatrixType& matrix)
}
}
template<typename MatrixType>
template<typename OtherDerived, typename ResultType>
bool QR<MatrixType>::solve(
const MatrixBase<OtherDerived>& b,
ResultType *result
) const
{
const int rows = m_qr.rows();
ei_assert(b.rows() == rows);
result->resize(rows, b.cols());
// TODO(keir): There is almost certainly a faster way to multiply by
// Q^T without explicitly forming matrixQ(). Investigate.
*result = matrixQ().transpose()*b;
if(!isSurjective())
{
// is result is in the image of R ?
RealScalar biggest_in_res = result->corner(TopLeft, m_rank, result->cols()).cwise().abs().maxCoeff();
for(int col = 0; col < result->cols(); ++col)
for(int row = m_rank; row < result->rows(); ++row)
if(!ei_isMuchSmallerThan(result->coeff(row,col), biggest_in_res))
return false;
}
m_qr.corner(TopLeft, m_rank, m_rank)
.template marked<UpperTriangular>()
.solveTriangularInPlace(result->corner(TopLeft, m_rank, result->cols()));
return true;
}
/** \returns the matrix Q */
template<typename MatrixType>
MatrixType QR<MatrixType>::matrixQ(void) const
MatrixType QR<MatrixType>::matrixQ() const
{
// compute the product Q_0 Q_1 ... Q_n-1,
// where Q_k is the k-th Householder transformation I - h_k v_k v_k'

View File

@@ -52,8 +52,8 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
typedef Tridiagonalization<MatrixType> TridiagonalizationType;
SelfAdjointEigenSolver()
: m_eivec(Size, Size),
m_eivalues(Size)
: m_eivec(int(Size), int(Size)),
m_eivalues(int(Size))
{
ei_assert(Size!=Dynamic);
}
@@ -189,6 +189,14 @@ void SelfAdjointEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool
assert(matrix.cols() == matrix.rows());
int n = matrix.cols();
m_eivalues.resize(n,1);
if(n==1)
{
m_eivalues.coeffRef(0,0) = ei_real(matrix.coeff(0,0));
m_eivec.setOnes();
return;
}
m_eivec = matrix;
// FIXME, should tridiag be a local variable of this function or an attribute of SelfAdjointEigenSolver ?

View File

@@ -60,10 +60,10 @@ template<typename _MatrixType> class Tridiagonalization
typedef Matrix<RealScalar, Size, 1> DiagonalType;
typedef Matrix<RealScalar, SizeMinusOne, 1> SubDiagonalType;
typedef typename NestByValue<DiagonalCoeffs<MatrixType> >::RealReturnType DiagonalReturnType;
typedef typename NestByValue<Diagonal<MatrixType,0> >::RealReturnType DiagonalReturnType;
typedef typename NestByValue<DiagonalCoeffs<
NestByValue<Block<MatrixType,SizeMinusOne,SizeMinusOne> > > >::RealReturnType SubDiagonalReturnType;
typedef typename NestByValue<Diagonal<
NestByValue<Block<MatrixType,SizeMinusOne,SizeMinusOne> >,0 > >::RealReturnType SubDiagonalReturnType;
/** This constructor initializes a Tridiagonalization object for
* further use with Tridiagonalization::compute()
@@ -73,8 +73,7 @@ template<typename _MatrixType> class Tridiagonalization
{}
Tridiagonalization(const MatrixType& matrix)
: m_matrix(matrix),
m_hCoeffs(matrix.cols()-1)
: m_matrix(matrix), m_hCoeffs(matrix.cols()-1)
{
_compute(m_matrix, m_hCoeffs);
}
@@ -122,13 +121,12 @@ template<typename _MatrixType> class Tridiagonalization
static void decomposeInPlace(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ = true);
private:
static void _compute(MatrixType& matA, CoeffVectorType& hCoeffs);
protected:
static void _decomposeInPlace3x3(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ = true);
protected:
MatrixType m_matrix;
CoeffVectorType m_hCoeffs;
};
@@ -192,7 +190,6 @@ void Tridiagonalization<MatrixType>::_compute(MatrixType& matA, CoeffVectorType&
{
assert(matA.rows()==matA.cols());
int n = matA.rows();
// std::cerr << matA << "\n\n";
for (int i = 0; i<n-2; ++i)
{
// let's consider the vector v = i-th column starting at position i+1
@@ -221,99 +218,20 @@ void Tridiagonalization<MatrixType>::_compute(MatrixType& matA, CoeffVectorType&
matA.col(i).coeffRef(i+1) = 1;
/* This is the initial algorithm which minimize operation counts and maximize
* the use of Eigen's expression. Unfortunately, the first matrix-vector product
* using Part<LowerTriangular|Selfadjoint> is very very slow */
#ifdef EIGEN_NEVER_DEFINED
// matrix - vector product
hCoeffs.end(n-i-1) = (matA.corner(BottomRight,n-i-1,n-i-1).template part<LowerTriangular|SelfAdjoint>()
* (h * matA.col(i).end(n-i-1))).lazy();
// simple axpy
hCoeffs.end(n-i-1) += (h * Scalar(-0.5) * matA.col(i).end(n-i-1).dot(hCoeffs.end(n-i-1)))
* matA.col(i).end(n-i-1);
// rank-2 update
//Block<MatrixType,Dynamic,1> B(matA,i+1,i,n-i-1,1);
matA.corner(BottomRight,n-i-1,n-i-1).template part<LowerTriangular>() -=
(matA.col(i).end(n-i-1) * hCoeffs.end(n-i-1).adjoint()).lazy()
+ (hCoeffs.end(n-i-1) * matA.col(i).end(n-i-1).adjoint()).lazy();
#endif
/* end initial algorithm */
// hCoeffs.end(n-i-1) = (matA.corner(BottomRight,n-i-1,n-i-1).template part<LowerTriangular|SelfAdjoint>()
// * matA.col(i).end(n-i-1)).lazy();
// TODO map the above code to the function call below:
ei_product_selfadjoint_vector<Scalar,MatrixType::Flags&RowMajorBit,LowerTriangularBit>
(n-i-1,matA.corner(BottomRight,n-i-1,n-i-1).data(), matA.stride(), matA.col(i).end(n-i-1).data(), const_cast<Scalar*>(hCoeffs.end(n-i-1).data()));
/* If we still want to minimize operation count (i.e., perform operation on the lower part only)
* then we could provide the following algorithm for selfadjoint - vector product. However, a full
* matrix-vector product is still faster (at least for dynamic size, and not too small, did not check
* small matrices). The algo performs block matrix-vector and transposed matrix vector products. */
#ifdef EIGEN_NEVER_DEFINED
int n4 = (std::max(0,n-4)/4)*4;
hCoeffs.end(n-i-1).setZero();
for (int b=i+1; b<n4; b+=4)
{
// the ?x4 part:
hCoeffs.end(b-4) +=
Block<MatrixType,Dynamic,4>(matA,b+4,b,n-b-4,4) * matA.template block<4,1>(b,i);
// the respective transposed part:
Block<CoeffVectorType,4,1>(hCoeffs, b, 0, 4,1) +=
Block<MatrixType,Dynamic,4>(matA,b+4,b,n-b-4,4).adjoint() * Block<MatrixType,Dynamic,1>(matA,b+4,i,n-b-4,1);
// the 4x4 block diagonal:
Block<CoeffVectorType,4,1>(hCoeffs, b, 0, 4,1) +=
(Block<MatrixType,4,4>(matA,b,b,4,4).template part<LowerTriangular|SelfAdjoint>()
* (h * Block<MatrixType,4,1>(matA,b,i,4,1))).lazy();
}
#endif
// todo: handle the remaining part
/* end optimized selfadjoint - vector product */
hCoeffs.end(n-i-1) = hCoeffs.end(n-i-1)*h
+ (h*ei_conj(h)*Scalar(-0.5)*(matA.col(i).end(n-i-1).dot(hCoeffs.end(n-i-1)))) *
matA.col(i).end(n-i-1);
/* Another interesting note: the above rank-2 update is much slower than the following hand written loop.
* After an analyze of the ASM, it seems GCC (4.2) generate poor code because of the Block. Moreover,
* if we remove the specialization of Block for Matrix then it is even worse, much worse ! */
#ifdef EIGEN_NEVER_DEFINED
// symmetric rank-2 update
for (int j1=i+1; j1<n; ++j1)
for (int i1=j1; i1<n; ++i1)
matA.coeffRef(i1,j1) -= matA.coeff(i1,i)*ei_conj(hCoeffs.coeff(j1-1))
+ hCoeffs.coeff(i1-1)*ei_conj(matA.coeff(j1,i));
#endif
/* end hand writen partial rank-2 update */
/* The current fastest implementation: the full matrix is used, no "optimization" to use/compute
* only half of the matrix. Custom vectorization of the inner col -= alpha X + beta Y such that access
* to col are always aligned. Once we support that in Assign, then the algorithm could be rewriten as
* a single compact expression. This code is therefore a good benchmark when will do that. */
// let's use the end of hCoeffs to store temporary values:
hCoeffs.end(n-i-1) = (matA.corner(BottomRight,n-i-1,n-i-1) * (h * matA.col(i).end(n-i-1))).lazy();
// FIXME in the above expr a temporary is created because of the scalar multiple by h
hCoeffs.end(n-i-1) += (h * Scalar(-0.5) * matA.col(i).end(n-i-1).dot(hCoeffs.end(n-i-1)))
* matA.col(i).end(n-i-1);
const Scalar* EIGEN_RESTRICT pb = &matA.coeffRef(0,i);
const Scalar* EIGEN_RESTRICT pa = (&hCoeffs.coeffRef(0)) - 1;
for (int j1=i+1; j1<n; ++j1)
{
int starti = i+1;
int alignedEnd = starti;
if (PacketSize>1)
{
int alignedStart = (starti) + ei_alignmentOffset(&matA.coeffRef(starti,j1), n-starti);
alignedEnd = alignedStart + ((n-alignedStart)/PacketSize)*PacketSize;
for (int i1=starti; i1<alignedStart; ++i1)
matA.coeffRef(i1,j1) -= matA.coeff(i1,i)*ei_conj(hCoeffs.coeff(j1-1))
+ hCoeffs.coeff(i1-1)*ei_conj(matA.coeff(j1,i));
Packet tmp0 = ei_pset1(hCoeffs.coeff(j1-1));
Packet tmp1 = ei_pset1(matA.coeff(j1,i));
Scalar* pc = &matA.coeffRef(0,j1);
for (int i1=alignedStart ; i1<alignedEnd; i1+=PacketSize)
ei_pstore(pc+i1,ei_psub(ei_pload(pc+i1),
ei_padd(ei_pmul(tmp0, ei_ploadu(pb+i1)),
ei_pmul(tmp1, ei_ploadu(pa+i1)))));
}
for (int i1=alignedEnd; i1<n; ++i1)
matA.coeffRef(i1,j1) -= matA.coeff(i1,i)*ei_conj(hCoeffs.coeff(j1-1))
+ hCoeffs.coeff(i1-1)*ei_conj(matA.coeff(j1,i));
}
/* end optimized implementation */
matA.col(j1).end(n-j1) -= matA.col(i).end(n-j1) * ei_conj(hCoeffs.coeff(j1-1))
+ hCoeffs.end(n-j1) * ei_conj(matA.coeff(j1,i));
// note: at that point matA(i+1,i+1) is the (i+1)-th element of the final diagonal
// note: the sequence of the beta values leads to the subdiagonal entries

View File

@@ -2,5 +2,5 @@ FILE(GLOB Eigen_SVD_SRCS "*.h")
INSTALL(FILES
${Eigen_SVD_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SVD
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SVD COMPONENT Devel
)

View File

@@ -36,7 +36,7 @@ template<typename _Scalar> class AmbiVector
typedef _Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
AmbiVector(int size)
: m_buffer(0), m_size(0), m_allocatedSize(0), m_allocatedElements(0), m_mode(-1)
: m_buffer(0), m_zero(0), m_size(0), m_allocatedSize(0), m_allocatedElements(0), m_mode(-1)
{
resize(size);
}
@@ -44,7 +44,7 @@ template<typename _Scalar> class AmbiVector
void init(RealScalar estimatedDensity);
void init(int mode);
void nonZeros() const;
int nonZeros() const;
/** Specifies a sub-vector to work on */
void setBounds(int start, int end) { m_start = start; m_end = end; }
@@ -53,7 +53,7 @@ template<typename _Scalar> class AmbiVector
void restart();
Scalar& coeffRef(int i);
Scalar coeff(int i);
Scalar& coeff(int i);
class Iterator;
@@ -112,6 +112,7 @@ template<typename _Scalar> class AmbiVector
// used to store data in both mode
Scalar* m_buffer;
Scalar m_zero;
int m_size;
int m_start;
int m_end;
@@ -131,7 +132,7 @@ template<typename _Scalar> class AmbiVector
/** \returns the number of non zeros in the current sub vector */
template<typename Scalar>
void AmbiVector<Scalar>::nonZeros() const
int AmbiVector<Scalar>::nonZeros() const
{
if (m_mode==IsSparse)
return m_llSize;
@@ -254,7 +255,7 @@ Scalar& AmbiVector<Scalar>::coeffRef(int i)
}
template<typename Scalar>
Scalar AmbiVector<Scalar>::coeff(int i)
Scalar& AmbiVector<Scalar>::coeff(int i)
{
if (m_mode==IsDense)
return m_buffer[i];
@@ -264,7 +265,7 @@ Scalar AmbiVector<Scalar>::coeff(int i)
ei_assert(m_mode==IsSparse);
if ((m_llSize==0) || (i<llElements[m_llStart].index))
{
return Scalar(0);
return m_zero;
}
else
{
@@ -275,7 +276,7 @@ Scalar AmbiVector<Scalar>::coeff(int i)
if (llElements[elid].index==i)
return llElements[m_llCurrent].value;
else
return Scalar(0);
return m_zero;
}
}
}

View File

@@ -2,5 +2,5 @@ FILE(GLOB Eigen_Sparse_SRCS "*.h")
INSTALL(FILES
${Eigen_Sparse_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Sparse
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Sparse COMPONENT Devel
)

View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -54,16 +54,17 @@ void ei_cholmod_configure_matrix(CholmodType& mat)
}
}
template<typename Scalar, int Flags>
cholmod_sparse SparseMatrixBase<Scalar,Flags>::asCholmodMatrix()
template<typename Derived>
cholmod_sparse SparseMatrixBase<Derived>::asCholmodMatrix()
{
typedef typename Derived::Scalar Scalar;
cholmod_sparse res;
res.nzmax = nonZeros();
res.nrow = rows();;
res.ncol = cols();
res.p = _outerIndexPtr();
res.i = _innerIndexPtr();
res.x = _valuePtr();
res.p = derived()._outerIndexPtr();
res.i = derived()._innerIndexPtr();
res.x = derived()._valuePtr();
res.xtype = CHOLMOD_REAL;
res.itype = CHOLMOD_INT;
res.sorted = 1;
@@ -73,11 +74,11 @@ cholmod_sparse SparseMatrixBase<Scalar,Flags>::asCholmodMatrix()
ei_cholmod_configure_matrix<Scalar>(res);
if (Flags & SelfAdjoint)
if (Derived::Flags & SelfAdjoint)
{
if (Flags & UpperTriangular)
if (Derived::Flags & UpperTriangular)
res.stype = 1;
else if (Flags & LowerTriangular)
else if (Derived::Flags & LowerTriangular)
res.stype = -1;
else
res.stype = 0;
@@ -108,14 +109,14 @@ cholmod_dense ei_cholmod_map_eigen_to_dense(MatrixBase<Derived>& mat)
}
template<typename Scalar, int Flags>
MappedSparseMatrix<Scalar,Flags>::MappedSparseMatrix(taucs_ccs_matrix& taucsMat)
MappedSparseMatrix<Scalar,Flags>::MappedSparseMatrix(cholmod_sparse& cm)
{
m_innerSize = cm.nrow;
m_outerSize = cm.ncol;
m_outerIndex = reinterpret_cast<int*>(cm.p);
m_innerIndices = reinterpret_cast<int*>(cm.i);
m_values = reinterpret_cast<Scalar*>(cm.x);
m_nnz = res.m_outerIndex[cm.ncol]);
m_nnz = m_outerIndex[cm.ncol];
}
template<typename MatrixType>
@@ -123,8 +124,8 @@ class SparseLLT<MatrixType,Cholmod> : public SparseLLT<MatrixType>
{
protected:
typedef SparseLLT<MatrixType> Base;
using typename Base::Scalar;
using Base::RealScalar;
typedef typename Base::Scalar Scalar;
typedef typename Base::RealScalar RealScalar;
using Base::MatrixLIsDirty;
using Base::SupernodalFactorIsDirty;
using Base::m_flags;
@@ -205,7 +206,7 @@ SparseLLT<MatrixType,Cholmod>::matrixL() const
ei_assert(!(m_status & SupernodalFactorIsDirty));
cholmod_sparse* cmRes = cholmod_factor_to_sparse(m_cholmodFactor, &m_cholmod);
const_cast<typename Base::CholMatrixType&>(m_matrix) = Base::CholMatrixType::Map(*cmRes);
const_cast<typename Base::CholMatrixType&>(m_matrix) = MappedSparseMatrix<Scalar>(*cmRes);
free(cmRes);
m_status = (m_status & ~MatrixLIsDirty);

View File

@@ -155,7 +155,7 @@ class CompressedStorage
/** Like at(), but the search is performed in the range [start,end) */
inline Scalar atInRange(size_t start, size_t end, int key, Scalar defaultValue = Scalar(0)) const
{
if (start==end)
if (start>=end)
return Scalar(0);
else if (end>start && key==m_indices[end-1])
return m_values[end-1];

View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -83,6 +83,9 @@ class DynamicSparseMatrix
inline int innerSize() const { return m_innerSize; }
inline int outerSize() const { return m_data.size(); }
inline int innerNonZeros(int j) const { return m_data[j].size(); }
std::vector<CompressedStorage<Scalar> >& _data() { return m_data; }
const std::vector<CompressedStorage<Scalar> >& _data() const { return m_data; }
/** \returns the coefficient value at given position \a row, \a col
* This operation involes a log(rho*outer_size) binary search.
@@ -107,14 +110,14 @@ class DynamicSparseMatrix
class InnerIterator;
inline void setZero()
void setZero()
{
for (int j=0; j<outerSize(); ++j)
m_data[j].clear();
}
/** \returns the number of non zero coefficients */
inline int nonZeros() const
int nonZeros() const
{
int res = 0;
for (int j=0; j<outerSize(); ++j)
@@ -122,18 +125,39 @@ class DynamicSparseMatrix
return res;
}
/** Set the matrix to zero and reserve the memory for \a reserveSize nonzero coefficients. */
inline void startFill(int reserveSize = 1000)
/** \deprecated
* Set the matrix to zero and reserve the memory for \a reserveSize nonzero coefficients. */
EIGEN_DEPRECATED void startFill(int reserveSize = 1000)
{
int reserveSizePerVector = std::max(reserveSize/outerSize(),4);
for (int j=0; j<outerSize(); ++j)
setZero();
reserve(reserveSize);
}
void reserve(int reserveSize = 1000)
{
if (outerSize()>0)
{
m_data[j].clear();
m_data[j].reserve(reserveSizePerVector);
int reserveSizePerVector = std::max(reserveSize/outerSize(),4);
for (int j=0; j<outerSize(); ++j)
{
m_data[j].reserve(reserveSizePerVector);
}
}
}
inline void startVec(int /*outer*/) {}
inline Scalar& insertBack(int outer, int inner)
{
ei_assert(outer<int(m_data.size()) && inner<m_innerSize && "out of range");
ei_assert(((m_data[outer].size()==0) || (m_data[outer].index(m_data[outer].size()-1)<inner))
&& "wrong sorted insertion");
m_data[outer].append(0, inner);
return m_data[outer].value(m_data[outer].size()-1);
}
/** inserts a nonzero coefficient at given coordinates \a row, \a col and returns its reference assuming that:
/** \deprecated use insert()
* inserts a nonzero coefficient at given coordinates \a row, \a col and returns its reference assuming that:
* 1 - the coefficient does not exist yet
* 2 - this the coefficient with greater inner coordinate for the given outer coordinate.
* In other words, assuming \c *this is column-major, then there must not exists any nonzero coefficient of coordinates
@@ -141,21 +165,24 @@ class DynamicSparseMatrix
*
* \see fillrand(), coeffRef()
*/
inline Scalar& fill(int row, int col)
EIGEN_DEPRECATED Scalar& fill(int row, int col)
{
const int outer = IsRowMajor ? row : col;
const int inner = IsRowMajor ? col : row;
ei_assert(outer<int(m_data.size()) && inner<m_innerSize);
ei_assert((m_data[outer].size()==0) || (m_data[outer].index(m_data[outer].size()-1)<inner));
m_data[outer].append(0, inner);
return m_data[outer].value(m_data[outer].size()-1);
return insertBack(outer,inner);
}
/** Like fill() but with random inner coordinates.
/** \deprecated use insert()
* Like fill() but with random inner coordinates.
* Compared to the generic coeffRef(), the unique limitation is that we assume
* the coefficient does not exist yet.
*/
inline Scalar& fillrand(int row, int col)
EIGEN_DEPRECATED Scalar& fillrand(int row, int col)
{
return insert(row,col);
}
inline Scalar& insert(int row, int col)
{
const int outer = IsRowMajor ? row : col;
const int inner = IsRowMajor ? col : row;
@@ -175,8 +202,11 @@ class DynamicSparseMatrix
return m_data[outer].value(id+1);
}
/** Does nothing. Provided for compatibility with SparseMatrix. */
inline void endFill() {}
/** \deprecated use finalize()
* Does nothing. Provided for compatibility with SparseMatrix. */
EIGEN_DEPRECATED void endFill() {}
inline void finalize() {}
void prune(Scalar reference, RealScalar epsilon = precision<RealScalar>())
{
@@ -215,7 +245,7 @@ class DynamicSparseMatrix
}
inline DynamicSparseMatrix()
: m_innerSize(0)
: m_innerSize(0), m_data(0)
{
ei_assert(innerSize()==0 && outerSize()==0);
}

View File

@@ -65,10 +65,10 @@ class MappedSparseMatrix
//----------------------------------------
// direct access interface
inline const Scalar* _valuePtr() const { return &m_values; }
inline Scalar* _valuePtr() { return &m_values; }
inline const Scalar* _valuePtr() const { return m_values; }
inline Scalar* _valuePtr() { return m_values; }
inline const int* _innerIndexPtr() const { return &m_innerIndices; }
inline const int* _innerIndexPtr() const { return m_innerIndices; }
inline int* _innerIndexPtr() { return m_innerIndices; }
inline const int* _outerIndexPtr() const { return m_outerIndex; }
@@ -77,8 +77,8 @@ class MappedSparseMatrix
inline Scalar coeff(int row, int col) const
{
const int outer = RowMajor ? row : col;
const int inner = RowMajor ? col : row;
const int outer = IsRowMajor ? row : col;
const int inner = IsRowMajor ? col : row;
int start = m_outerIndex[outer];
int end = m_outerIndex[outer+1];
@@ -96,8 +96,8 @@ class MappedSparseMatrix
inline Scalar& coeffRef(int row, int col)
{
const int outer = RowMajor ? row : col;
const int inner = RowMajor ? col : row;
const int outer = IsRowMajor ? row : col;
const int inner = IsRowMajor ? col : row;
int start = m_outerIndex[outer];
int end = m_outerIndex[outer+1];
@@ -108,7 +108,7 @@ class MappedSparseMatrix
ei_assert((*r==inner) && (id<end) && "coeffRef cannot be called on a zero coefficient");
return m_values[id];
}
class InnerIterator;
/** \returns the number of non zero coefficients */
@@ -140,21 +140,25 @@ class MappedSparseMatrix<Scalar,_Flags>::InnerIterator
{
public:
InnerIterator(const MappedSparseMatrix& mat, int outer)
: m_matrix(mat), m_outer(outer), m_id(mat._outerIndexPtr[outer]), m_start(m_id), m_end(mat._outerIndexPtr[outer+1])
: m_matrix(mat),
m_outer(outer),
m_id(mat._outerIndexPtr()[outer]),
m_start(m_id),
m_end(mat._outerIndexPtr()[outer+1])
{}
template<unsigned int Added, unsigned int Removed>
InnerIterator(const Flagged<MappedSparseMatrix,Added,Removed>& mat, int outer)
: m_matrix(mat._expression()), m_id(m_matrix._outerIndexPtr[outer]),
m_start(m_id), m_end(m_matrix._outerIndexPtr[outer+1])
: m_matrix(mat._expression()), m_id(m_matrix._outerIndexPtr()[outer]),
m_start(m_id), m_end(m_matrix._outerIndexPtr()[outer+1])
{}
inline InnerIterator& operator++() { m_id++; return *this; }
inline Scalar value() const { return m_matrix.m_valuePtr[m_id]; }
inline Scalar& valueRef() { return const_cast<Scalar&>(m_matrix._valuePtr[m_id]); }
inline Scalar value() const { return m_matrix._valuePtr()[m_id]; }
inline Scalar& valueRef() { return const_cast<Scalar&>(m_matrix._valuePtr()[m_id]); }
inline int index() const { return m_matrix._innerIndexPtr(m_id); }
inline int index() const { return m_matrix._innerIndexPtr()[m_id]; }
inline int row() const { return IsRowMajor ? m_outer : index(); }
inline int col() const { return IsRowMajor ? index() : m_outer; }

View File

@@ -224,19 +224,28 @@ class RandomSetter
KeyType keyBitsMask = (1<<m_keyBitsOffset)-1;
if (!SwapStorage) // also means the map is sorted
{
mp_target->startFill(nonZeros());
mp_target->setZero();
mp_target->reserve(nonZeros());
int prevOuter = -1;
for (int k=0; k<m_outerPackets; ++k)
{
mp_target->startVec(k);
const int outerOffset = (1<<OuterPacketBits) * k;
typename HashMapType::iterator end = m_hashmaps[k].end();
for (typename HashMapType::iterator it = m_hashmaps[k].begin(); it!=end; ++it)
{
const int outer = (it->first >> m_keyBitsOffset) + outerOffset;
const int inner = it->first & keyBitsMask;
mp_target->fill(TargetRowMajor ? outer : inner, TargetRowMajor ? inner : outer) = it->second.value;
if (prevOuter!=outer)
{
for (int j=prevOuter+1;j<=outer;++j)
mp_target->startVec(j);
prevOuter = outer;
}
mp_target->insertBack(outer, inner) = it->second.value;
}
}
mp_target->endFill();
mp_target->finalize();
}
else
{

View File

@@ -26,70 +26,274 @@
#ifndef EIGEN_SPARSE_BLOCK_H
#define EIGEN_SPARSE_BLOCK_H
template<typename MatrixType>
struct ei_traits<SparseInnerVector<MatrixType> >
template<typename MatrixType, int Size>
struct ei_traits<SparseInnerVectorSet<MatrixType, Size> >
{
typedef typename ei_traits<MatrixType>::Scalar Scalar;
enum {
IsRowMajor = (int(MatrixType::Flags)&RowMajorBit)==RowMajorBit,
Flags = MatrixType::Flags,
RowsAtCompileTime = IsRowMajor ? 1 : MatrixType::RowsAtCompileTime,
ColsAtCompileTime = IsRowMajor ? MatrixType::ColsAtCompileTime : 1,
RowsAtCompileTime = IsRowMajor ? Size : MatrixType::RowsAtCompileTime,
ColsAtCompileTime = IsRowMajor ? MatrixType::ColsAtCompileTime : Size,
CoeffReadCost = MatrixType::CoeffReadCost
};
};
template<typename MatrixType>
class SparseInnerVector : ei_no_assignment_operator,
public SparseMatrixBase<SparseInnerVector<MatrixType> >
template<typename MatrixType, int Size>
class SparseInnerVectorSet : ei_no_assignment_operator,
public SparseMatrixBase<SparseInnerVectorSet<MatrixType, Size> >
{
enum {
IsRowMajor = ei_traits<SparseInnerVector>::IsRowMajor
};
public:
enum { IsRowMajor = ei_traits<SparseInnerVectorSet>::IsRowMajor };
public:
EIGEN_SPARSE_GENERIC_PUBLIC_INTERFACE(SparseInnerVector)
class InnerIterator;
inline SparseInnerVector(const MatrixType& matrix, int outer)
: m_matrix(matrix), m_outer(outer)
EIGEN_SPARSE_GENERIC_PUBLIC_INTERFACE(SparseInnerVectorSet)
class InnerIterator: public MatrixType::InnerIterator
{
public:
inline InnerIterator(const SparseInnerVectorSet& xpr, int outer)
: MatrixType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer)
{}
};
inline SparseInnerVectorSet(const MatrixType& matrix, int outerStart, int outerSize)
: m_matrix(matrix), m_outerStart(outerStart), m_outerSize(outerSize)
{
ei_assert( (outerStart>=0) && ((outerStart+outerSize)<=matrix.outerSize()) );
}
inline SparseInnerVectorSet(const MatrixType& matrix, int outer)
: m_matrix(matrix), m_outerStart(outer), m_outerSize(Size)
{
ei_assert(Size!=Dynamic);
ei_assert( (outer>=0) && (outer<matrix.outerSize()) );
}
EIGEN_STRONG_INLINE int rows() const { return IsRowMajor ? 1 : m_matrix.rows(); }
EIGEN_STRONG_INLINE int cols() const { return IsRowMajor ? m_matrix.cols() : 1; }
// template<typename OtherDerived>
// inline SparseInnerVectorSet& operator=(const SparseMatrixBase<OtherDerived>& other)
// {
// return *this;
// }
// template<typename Sparse>
// inline SparseInnerVectorSet& operator=(const SparseMatrixBase<OtherDerived>& other)
// {
// return *this;
// }
EIGEN_STRONG_INLINE int rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
EIGEN_STRONG_INLINE int cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
protected:
const typename MatrixType::Nested m_matrix;
int m_outer;
int m_outerStart;
const ei_int_if_dynamic<Size> m_outerSize;
};
template<typename MatrixType>
class SparseInnerVector<MatrixType>::InnerIterator : public MatrixType::InnerIterator
/***************************************************************************
* specialisation for DynamicSparseMatrix
***************************************************************************/
template<typename _Scalar, int _Options, int Size>
class SparseInnerVectorSet<DynamicSparseMatrix<_Scalar, _Options>, Size>
: public SparseMatrixBase<SparseInnerVectorSet<DynamicSparseMatrix<_Scalar, _Options>, Size> >
{
public:
inline InnerIterator(const SparseInnerVector& xpr, int outer=0)
: MatrixType::InnerIterator(xpr.m_matrix, xpr.m_outer)
{
ei_assert(outer==0);
}
typedef DynamicSparseMatrix<_Scalar, _Options> MatrixType;
enum { IsRowMajor = ei_traits<SparseInnerVectorSet>::IsRowMajor };
public:
EIGEN_SPARSE_GENERIC_PUBLIC_INTERFACE(SparseInnerVectorSet)
class InnerIterator: public MatrixType::InnerIterator
{
public:
inline InnerIterator(const SparseInnerVectorSet& xpr, int outer)
: MatrixType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer)
{}
};
inline SparseInnerVectorSet(const MatrixType& matrix, int outerStart, int outerSize)
: m_matrix(matrix), m_outerStart(outerStart), m_outerSize(outerSize)
{
ei_assert( (outerStart>=0) && ((outerStart+outerSize)<=matrix.outerSize()) );
}
inline SparseInnerVectorSet(const MatrixType& matrix, int outer)
: m_matrix(matrix), m_outerStart(outer), m_outerSize(Size)
{
ei_assert(Size!=Dynamic);
ei_assert( (outer>=0) && (outer<matrix.outerSize()) );
}
template<typename OtherDerived>
inline SparseInnerVectorSet& operator=(const SparseMatrixBase<OtherDerived>& other)
{
if (IsRowMajor != ((OtherDerived::Flags&RowMajorBit)==RowMajorBit))
{
// need to transpose => perform a block evaluation followed by a big swap
DynamicSparseMatrix<Scalar,IsRowMajor?RowMajorBit:0> aux(other);
*this = aux.markAsRValue();
}
else
{
// evaluate/copy vector per vector
for (int j=0; j<m_outerSize.value(); ++j)
{
SparseVector<Scalar,IsRowMajor ? RowMajorBit : 0> aux(other.innerVector(j));
m_matrix.const_cast_derived()._data()[m_outerStart+j].swap(aux._data());
}
}
return *this;
}
inline SparseInnerVectorSet& operator=(const SparseInnerVectorSet& other)
{
return operator=<SparseInnerVectorSet>(other);
}
int nonZeros() const
{
int count = 0;
for (int j=0; j<m_outerSize; ++j)
count += m_matrix._data()[m_outerStart+j].size();
return count;
}
const Scalar& lastCoeff() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(SparseInnerVectorSet);
ei_assert(m_matrix.data()[m_outerStart].size()>0);
return m_matrix.data()[m_outerStart].vale(m_matrix.data()[m_outerStart].size()-1);
}
// template<typename Sparse>
// inline SparseInnerVectorSet& operator=(const SparseMatrixBase<OtherDerived>& other)
// {
// return *this;
// }
EIGEN_STRONG_INLINE int rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
EIGEN_STRONG_INLINE int cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
protected:
const typename MatrixType::Nested m_matrix;
int m_outerStart;
const ei_int_if_dynamic<Size> m_outerSize;
};
/***************************************************************************
* specialisation for SparseMatrix
***************************************************************************/
template<typename _Scalar, int _Options, int Size>
class SparseInnerVectorSet<SparseMatrix<_Scalar, _Options>, Size>
: public SparseMatrixBase<SparseInnerVectorSet<SparseMatrix<_Scalar, _Options>, Size> >
{
typedef SparseMatrix<_Scalar, _Options> MatrixType;
enum { IsRowMajor = ei_traits<SparseInnerVectorSet>::IsRowMajor };
public:
EIGEN_SPARSE_GENERIC_PUBLIC_INTERFACE(SparseInnerVectorSet)
class InnerIterator: public MatrixType::InnerIterator
{
public:
inline InnerIterator(const SparseInnerVectorSet& xpr, int outer)
: MatrixType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer)
{}
};
inline SparseInnerVectorSet(const MatrixType& matrix, int outerStart, int outerSize)
: m_matrix(matrix), m_outerStart(outerStart), m_outerSize(outerSize)
{
ei_assert( (outerStart>=0) && ((outerStart+outerSize)<=matrix.outerSize()) );
}
inline SparseInnerVectorSet(const MatrixType& matrix, int outer)
: m_matrix(matrix), m_outerStart(outer)
{
ei_assert(Size==1);
ei_assert( (outer>=0) && (outer<matrix.outerSize()) );
}
template<typename OtherDerived>
inline SparseInnerVectorSet& operator=(const SparseMatrixBase<OtherDerived>& other)
{
if (IsRowMajor != ((OtherDerived::Flags&RowMajorBit)==RowMajorBit))
{
// need to transpose => perform a block evaluation followed by a big swap
DynamicSparseMatrix<Scalar,IsRowMajor?RowMajorBit:0> aux(other);
*this = aux.markAsRValue();
}
else
{
// evaluate/copy vector per vector
for (int j=0; j<m_outerSize.value(); ++j)
{
SparseVector<Scalar,IsRowMajor ? RowMajorBit : 0> aux(other.innerVector(j));
m_matrix.const_cast_derived()._data()[m_outerStart+j].swap(aux._data());
}
}
return *this;
}
inline SparseInnerVectorSet& operator=(const SparseInnerVectorSet& other)
{
return operator=<SparseInnerVectorSet>(other);
}
inline const Scalar* _valuePtr() const
{ return m_matrix._valuePtr() + m_matrix._outerIndexPtr()[m_outerStart]; }
inline const int* _innerIndexPtr() const
{ return m_matrix._innerIndexPtr() + m_matrix._outerIndexPtr()[m_outerStart]; }
inline const int* _outerIndexPtr() const
{ return m_matrix._outerIndexPtr() + m_outerStart; }
int nonZeros() const
{
return size_t(m_matrix._outerIndexPtr()[m_outerStart+m_outerSize.value()])
- size_t(m_matrix._outerIndexPtr()[m_outerStart]); }
const Scalar& lastCoeff() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(SparseInnerVectorSet);
ei_assert(nonZeros()>0);
return m_matrix._valuePtr()[m_matrix._outerIndexPtr()[m_outerStart+1]-1];
}
// template<typename Sparse>
// inline SparseInnerVectorSet& operator=(const SparseMatrixBase<OtherDerived>& other)
// {
// return *this;
// }
EIGEN_STRONG_INLINE int rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
EIGEN_STRONG_INLINE int cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
protected:
const typename MatrixType::Nested m_matrix;
int m_outerStart;
const ei_int_if_dynamic<Size> m_outerSize;
};
//----------
/** \returns the i-th row of the matrix \c *this. For row-major matrix only. */
template<typename Derived>
SparseInnerVector<Derived> SparseMatrixBase<Derived>::row(int i)
SparseInnerVectorSet<Derived,1> SparseMatrixBase<Derived>::row(int i)
{
EIGEN_STATIC_ASSERT(IsRowMajor,THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES);
return innerVector(i);
}
/** \returns the i-th row of the matrix \c *this. For row-major matrix only.
/** \returns the i-th row of the matrix \c *this. For row-major matrix only.
* (read-only version) */
template<typename Derived>
const SparseInnerVector<Derived> SparseMatrixBase<Derived>::row(int i) const
const SparseInnerVectorSet<Derived,1> SparseMatrixBase<Derived>::row(int i) const
{
EIGEN_STATIC_ASSERT(IsRowMajor,THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES);
return innerVector(i);
@@ -97,18 +301,18 @@ const SparseInnerVector<Derived> SparseMatrixBase<Derived>::row(int i) const
/** \returns the i-th column of the matrix \c *this. For column-major matrix only. */
template<typename Derived>
SparseInnerVector<Derived> SparseMatrixBase<Derived>::col(int i)
SparseInnerVectorSet<Derived,1> SparseMatrixBase<Derived>::col(int i)
{
EIGEN_STATIC_ASSERT(!IsRowMajor,THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES);
EIGEN_STATIC_ASSERT(!IsRowMajor,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
return innerVector(i);
}
/** \returns the i-th column of the matrix \c *this. For column-major matrix only.
/** \returns the i-th column of the matrix \c *this. For column-major matrix only.
* (read-only version) */
template<typename Derived>
const SparseInnerVector<Derived> SparseMatrixBase<Derived>::col(int i) const
const SparseInnerVectorSet<Derived,1> SparseMatrixBase<Derived>::col(int i) const
{
EIGEN_STATIC_ASSERT(!IsRowMajor,THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES);
EIGEN_STATIC_ASSERT(!IsRowMajor,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
return innerVector(i);
}
@@ -116,15 +320,65 @@ const SparseInnerVector<Derived> SparseMatrixBase<Derived>::col(int i) const
* is col-major (resp. row-major).
*/
template<typename Derived>
SparseInnerVector<Derived> SparseMatrixBase<Derived>::innerVector(int outer)
{ return SparseInnerVector<Derived>(derived(), outer); }
SparseInnerVectorSet<Derived,1> SparseMatrixBase<Derived>::innerVector(int outer)
{ return SparseInnerVectorSet<Derived,1>(derived(), outer); }
/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this
* is col-major (resp. row-major). Read-only.
*/
template<typename Derived>
const SparseInnerVector<Derived> SparseMatrixBase<Derived>::innerVector(int outer) const
{ return SparseInnerVector<Derived>(derived(), outer); }
const SparseInnerVectorSet<Derived,1> SparseMatrixBase<Derived>::innerVector(int outer) const
{ return SparseInnerVectorSet<Derived,1>(derived(), outer); }
//----------
/** \returns the i-th row of the matrix \c *this. For row-major matrix only. */
template<typename Derived>
SparseInnerVectorSet<Derived,Dynamic> SparseMatrixBase<Derived>::subrows(int start, int size)
{
EIGEN_STATIC_ASSERT(IsRowMajor,THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES);
return innerVectors(start, size);
}
/** \returns the i-th row of the matrix \c *this. For row-major matrix only.
* (read-only version) */
template<typename Derived>
const SparseInnerVectorSet<Derived,Dynamic> SparseMatrixBase<Derived>::subrows(int start, int size) const
{
EIGEN_STATIC_ASSERT(IsRowMajor,THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES);
return innerVectors(start, size);
}
/** \returns the i-th column of the matrix \c *this. For column-major matrix only. */
template<typename Derived>
SparseInnerVectorSet<Derived,Dynamic> SparseMatrixBase<Derived>::subcols(int start, int size)
{
EIGEN_STATIC_ASSERT(!IsRowMajor,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
return innerVectors(start, size);
}
/** \returns the i-th column of the matrix \c *this. For column-major matrix only.
* (read-only version) */
template<typename Derived>
const SparseInnerVectorSet<Derived,Dynamic> SparseMatrixBase<Derived>::subcols(int start, int size) const
{
EIGEN_STATIC_ASSERT(!IsRowMajor,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
return innerVectors(start, size);
}
/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this
* is col-major (resp. row-major).
*/
template<typename Derived>
SparseInnerVectorSet<Derived,Dynamic> SparseMatrixBase<Derived>::innerVectors(int outerStart, int outerSize)
{ return SparseInnerVectorSet<Derived,Dynamic>(derived(), outerStart, outerSize); }
/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this
* is col-major (resp. row-major). Read-only.
*/
template<typename Derived>
const SparseInnerVectorSet<Derived,Dynamic> SparseMatrixBase<Derived>::innerVectors(int outerStart, int outerSize) const
{ return SparseInnerVectorSet<Derived,Dynamic>(derived(), outerStart, outerSize); }
# if 0
template<typename MatrixType, int BlockRows, int BlockCols, int PacketAccess>

View File

@@ -86,6 +86,8 @@ class SparseCwiseBinaryOp : ei_no_assignment_operator,
EIGEN_STRONG_INLINE SparseCwiseBinaryOp(const Lhs& lhs, const Rhs& rhs, const BinaryOp& func = BinaryOp())
: m_lhs(lhs), m_rhs(rhs), m_functor(func)
{
EIGEN_STATIC_ASSERT((_LhsNested::Flags&RowMajorBit)==(_RhsNested::Flags&RowMajorBit),
BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER)
EIGEN_STATIC_ASSERT((ei_functor_allows_mixing_real_and_complex<BinaryOp>::ret
? int(ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret)
: int(ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret)),
@@ -130,11 +132,10 @@ class SparseCwiseBinaryOp<BinaryOp,Lhs,Rhs>::InnerIterator
* Implementation of inner-iterators
***************************************************************************/
// template<typename T> struct ei_is_scalar_product { enum { ret = false }; };
// template<typename T> struct ei_is_scalar_product<ei_scalar_product_op<T> > { enum { ret = true }; };
// helper class
// template<typename T> struct ei_func_is_conjunction { enum { ret = false }; };
// template<typename T> struct ei_func_is_conjunction<ei_scalar_product_op<T> > { enum { ret = true }; };
// TODO generalize the ei_scalar_product_op specialization to all conjunctions if any !
// sparse - sparse (generic)
template<typename BinaryOp, typename Lhs, typename Rhs, typename Derived>
@@ -259,12 +260,13 @@ class ei_sparse_cwise_binary_op_inner_iterator_selector<ei_scalar_product_op<T>,
typedef SparseCwiseBinaryOp<BinaryFunc, Lhs, Rhs> CwiseBinaryXpr;
typedef typename CwiseBinaryXpr::Scalar Scalar;
typedef typename ei_traits<CwiseBinaryXpr>::_LhsNested _LhsNested;
typedef typename ei_traits<CwiseBinaryXpr>::RhsNested RhsNested;
typedef typename _LhsNested::InnerIterator LhsIterator;
enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit };
public:
EIGEN_STRONG_INLINE ei_sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, int outer)
: m_xpr(xpr), m_lhsIter(xpr.lhs(),outer), m_functor(xpr.functor()), m_outer(outer)
: m_rhs(xpr.rhs()), m_lhsIter(xpr.lhs(),outer), m_functor(xpr.functor()), m_outer(outer)
{}
EIGEN_STRONG_INLINE Derived& operator++()
@@ -275,7 +277,7 @@ class ei_sparse_cwise_binary_op_inner_iterator_selector<ei_scalar_product_op<T>,
EIGEN_STRONG_INLINE Scalar value() const
{ return m_functor(m_lhsIter.value(),
m_xpr.rhs().coeff(IsRowMajor?m_outer:m_lhsIter.index(),IsRowMajor?m_lhsIter.index():m_outer)); }
m_rhs.coeff(IsRowMajor?m_outer:m_lhsIter.index(),IsRowMajor?m_lhsIter.index():m_outer)); }
EIGEN_STRONG_INLINE int index() const { return m_lhsIter.index(); }
EIGEN_STRONG_INLINE int row() const { return m_lhsIter.row(); }
@@ -284,9 +286,9 @@ class ei_sparse_cwise_binary_op_inner_iterator_selector<ei_scalar_product_op<T>,
EIGEN_STRONG_INLINE operator bool() const { return m_lhsIter; }
protected:
const CwiseBinaryXpr& m_xpr;
const RhsNested m_rhs;
LhsIterator m_lhsIter;
const BinaryFunc& m_functor;
const BinaryFunc m_functor;
const int m_outer;
};
@@ -329,6 +331,10 @@ class ei_sparse_cwise_binary_op_inner_iterator_selector<ei_scalar_product_op<T>,
};
/***************************************************************************
* Implementation of SparseMatrixBase and SparseCwise functions/operators
***************************************************************************/
template<typename Derived>
template<typename OtherDerived>
EIGEN_STRONG_INLINE const SparseCwiseBinaryOp<ei_scalar_difference_op<typename ei_traits<Derived>::Scalar>,

View File

@@ -89,7 +89,7 @@ class SparseCwiseUnaryOp<UnaryOp,MatrixType>::InnerIterator
protected:
MatrixTypeIterator m_iter;
const UnaryOp& m_functor;
const UnaryOp m_functor;
};
template<typename Derived>

View File

@@ -0,0 +1,157 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_SPARSE_DIAGONAL_PRODUCT_H
#define EIGEN_SPARSE_DIAGONAL_PRODUCT_H
// the product a diagonal matrix with a sparse matrix can be easily
// implemented using expression template. We have two very different cases:
// 1 - diag * row-major sparse
// => each inner vector <=> scalar * sparse vector product
// => so we can reuse CwiseUnaryOp::InnerIterator
// 2 - diag * col-major sparse
// => each inner vector <=> densevector * sparse vector cwise product
// => again, we can reuse specialization of CwiseBinaryOp::InnerIterator
// for that particular case
// The two other cases are symmetric.
template<typename Lhs, typename Rhs>
struct ei_traits<SparseDiagonalProduct<Lhs, Rhs> > : ei_traits<SparseProduct<Lhs, Rhs, DiagonalProduct> >
{
typedef typename ei_cleantype<Lhs>::type _Lhs;
typedef typename ei_cleantype<Rhs>::type _Rhs;
enum {
SparseFlags = ei_is_diagonal<_Lhs>::ret ? int(_Rhs::Flags) : int(_Lhs::Flags),
Flags = SparseBit | (SparseFlags&RowMajorBit)
};
};
enum {SDP_IsDiagonal, SDP_IsSparseRowMajor, SDP_IsSparseColMajor};
template<typename Lhs, typename Rhs, typename SparseDiagonalProductType, int RhsMode, int LhsMode>
class ei_sparse_diagonal_product_inner_iterator_selector;
template<typename LhsNested, typename RhsNested>
class SparseDiagonalProduct : public SparseMatrixBase<SparseDiagonalProduct<LhsNested,RhsNested> >, ei_no_assignment_operator
{
typedef typename ei_traits<SparseDiagonalProduct>::_LhsNested _LhsNested;
typedef typename ei_traits<SparseDiagonalProduct>::_RhsNested _RhsNested;
enum {
LhsMode = ei_is_diagonal<_LhsNested>::ret ? SDP_IsDiagonal
: (_LhsNested::Flags&RowMajorBit) ? SDP_IsSparseRowMajor : SDP_IsSparseColMajor,
RhsMode = ei_is_diagonal<_RhsNested>::ret ? SDP_IsDiagonal
: (_RhsNested::Flags&RowMajorBit) ? SDP_IsSparseRowMajor : SDP_IsSparseColMajor
};
public:
EIGEN_SPARSE_GENERIC_PUBLIC_INTERFACE(SparseDiagonalProduct)
typedef ei_sparse_diagonal_product_inner_iterator_selector
<_LhsNested,_RhsNested,SparseDiagonalProduct,LhsMode,RhsMode> InnerIterator;
template<typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE SparseDiagonalProduct(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs)
{
ei_assert(lhs.cols() == rhs.rows() && "invalid sparse matrix * diagonal matrix product");
}
EIGEN_STRONG_INLINE int rows() const { return m_lhs.rows(); }
EIGEN_STRONG_INLINE int cols() const { return m_rhs.cols(); }
EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; }
EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; }
protected:
LhsNested m_lhs;
RhsNested m_rhs;
};
template<typename Lhs, typename Rhs, typename SparseDiagonalProductType>
class ei_sparse_diagonal_product_inner_iterator_selector
<Lhs,Rhs,SparseDiagonalProductType,SDP_IsDiagonal,SDP_IsSparseRowMajor>
: public SparseCwiseUnaryOp<ei_scalar_multiple_op<typename Lhs::Scalar>,Rhs>::InnerIterator
{
typedef typename SparseCwiseUnaryOp<ei_scalar_multiple_op<typename Lhs::Scalar>,Rhs>::InnerIterator Base;
public:
inline ei_sparse_diagonal_product_inner_iterator_selector(
const SparseDiagonalProductType& expr, int outer)
: Base(expr.rhs()*(expr.lhs().diagonal().coeff(outer)), outer)
{}
};
template<typename Lhs, typename Rhs, typename SparseDiagonalProductType>
class ei_sparse_diagonal_product_inner_iterator_selector
<Lhs,Rhs,SparseDiagonalProductType,SDP_IsDiagonal,SDP_IsSparseColMajor>
: public SparseCwiseBinaryOp<
ei_scalar_product_op<typename Lhs::Scalar>,
SparseInnerVectorSet<Rhs,1>,
typename Lhs::_CoeffsVectorType>::InnerIterator
{
typedef typename SparseCwiseBinaryOp<
ei_scalar_product_op<typename Lhs::Scalar>,
SparseInnerVectorSet<Rhs,1>,
typename Lhs::_CoeffsVectorType>::InnerIterator Base;
public:
inline ei_sparse_diagonal_product_inner_iterator_selector(
const SparseDiagonalProductType& expr, int outer)
: Base(expr.rhs().innerVector(outer) .cwise()* expr.lhs().diagonal(), 0)
{}
};
template<typename Lhs, typename Rhs, typename SparseDiagonalProductType>
class ei_sparse_diagonal_product_inner_iterator_selector
<Lhs,Rhs,SparseDiagonalProductType,SDP_IsSparseColMajor,SDP_IsDiagonal>
: public SparseCwiseUnaryOp<ei_scalar_multiple_op<typename Rhs::Scalar>,Lhs>::InnerIterator
{
typedef typename SparseCwiseUnaryOp<ei_scalar_multiple_op<typename Rhs::Scalar>,Lhs>::InnerIterator Base;
public:
inline ei_sparse_diagonal_product_inner_iterator_selector(
const SparseDiagonalProductType& expr, int outer)
: Base(expr.lhs()*expr.rhs().diagonal().coeff(outer), outer)
{}
};
template<typename Lhs, typename Rhs, typename SparseDiagonalProductType>
class ei_sparse_diagonal_product_inner_iterator_selector
<Lhs,Rhs,SparseDiagonalProductType,SDP_IsSparseRowMajor,SDP_IsDiagonal>
: public SparseCwiseBinaryOp<
ei_scalar_product_op<typename Rhs::Scalar>,
SparseInnerVectorSet<Lhs,1>,
NestByValue<Transpose<typename Rhs::_CoeffsVectorType> > >::InnerIterator
{
typedef typename SparseCwiseBinaryOp<
ei_scalar_product_op<typename Rhs::Scalar>,
SparseInnerVectorSet<Lhs,1>,
NestByValue<Transpose<typename Rhs::_CoeffsVectorType> > >::InnerIterator Base;
public:
inline ei_sparse_diagonal_product_inner_iterator_selector(
const SparseDiagonalProductType& expr, int outer)
: Base(expr.lhs().innerVector(outer) .cwise()* expr.rhs().diagonal().transpose().nestByValue(), 0)
{}
};
#endif // EIGEN_SPARSE_DIAGONAL_PRODUCT_H

View File

@@ -79,7 +79,7 @@ class SparseLDLT
protected:
typedef typename MatrixType::Scalar Scalar;
typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
typedef SparseMatrix<Scalar,LowerTriangular|UnitDiagBit> CholMatrixType;
typedef SparseMatrix<Scalar> CholMatrixType;
typedef Matrix<Scalar,MatrixType::ColsAtCompileTime,1> VectorType;
enum {
@@ -137,7 +137,7 @@ class SparseLDLT
* overloads the MemoryEfficient flags)
*
* \sa flags() */
void settagss(int f) { m_flags = f; }
void settags(int f) { m_flags = f; }
/** \returns the current flags */
int flags() const { return m_flags; }
@@ -333,12 +333,12 @@ bool SparseLDLT<MatrixType, Backend>::solveInPlace(MatrixBase<Derived> &b) const
return false;
if (m_matrix.nonZeros()>0) // otherwise L==I
m_matrix.solveTriangularInPlace(b);
m_matrix.template triangular<LowerTriangular|UnitDiagBit>().solveInPlace(b);
b = b.cwise() / m_diag;
// FIXME should be .adjoint() but it fails to compile...
if (m_matrix.nonZeros()>0) // otherwise L==I
m_matrix.transpose().solveTriangularInPlace(b);
m_matrix.transpose().template triangular<UpperTriangular|UnitDiagBit>().solveInPlace(b);
return true;
}

View File

@@ -135,7 +135,8 @@ void SparseLLT<MatrixType,Backend>::compute(const MatrixType& a)
RealScalar density = a.nonZeros()/RealScalar(size*size);
// TODO estimate the number of non zeros
m_matrix.startFill(a.nonZeros()*2);
m_matrix.setZero();
m_matrix.reserve(a.nonZeros()*2);
for (int j = 0; j < size; ++j)
{
Scalar x = ei_real(a.coeff(j,j));
@@ -147,6 +148,8 @@ void SparseLLT<MatrixType,Backend>::compute(const MatrixType& a)
// init with current matrix a
{
typename MatrixType::InnerIterator it(a,j);
ei_assert(it.index()==j &&
"matrix must has non zero diagonal entries and only the lower triangular part must be stored");
++it; // skip diagonal element
for (; it; ++it)
tempVector.coeffRef(it.index()) = it.value();
@@ -171,14 +174,15 @@ void SparseLLT<MatrixType,Backend>::compute(const MatrixType& a)
// copy the temporary vector to the respective m_matrix.col()
// while scaling the result by 1/real(x)
RealScalar rx = ei_sqrt(ei_real(x));
m_matrix.fill(j,j) = rx;
m_matrix.insert(j,j) = rx; // FIXME use insertBack
Scalar y = Scalar(1)/rx;
for (typename AmbiVector<Scalar>::Iterator it(tempVector, m_precision*rx); it; ++it)
{
m_matrix.fill(it.index(), j) = it.value() * y;
// FIXME use insertBack
m_matrix.insert(it.index(), j) = it.value() * y;
}
}
m_matrix.endFill();
m_matrix.finalize();
}
/** Computes b = L^-T L^-1 b */
@@ -189,15 +193,15 @@ bool SparseLLT<MatrixType, Backend>::solveInPlace(MatrixBase<Derived> &b) const
const int size = m_matrix.rows();
ei_assert(size==b.rows());
m_matrix.solveTriangularInPlace(b);
m_matrix.template triangular<LowerTriangular>().solveInPlace(b);
// FIXME should be simply .adjoint() but it fails to compile...
if (NumTraits<Scalar>::IsComplex)
{
CholMatrixType aux = m_matrix.conjugate();
aux.transpose().solveTriangularInPlace(b);
aux.transpose().template triangular<UpperTriangular>().solveInPlace(b);
}
else
m_matrix.transpose().solveTriangularInPlace(b);
m_matrix.transpose().template triangular<UpperTriangular>().solveInPlace(b);
return true;
}

View File

@@ -25,6 +25,12 @@
#ifndef EIGEN_SPARSELU_H
#define EIGEN_SPARSELU_H
enum {
SvNoTrans = 0,
SvTranspose = 1,
SvAdjoint = 2
};
/** \ingroup Sparse_Module
*
* \class SparseLU
@@ -115,7 +121,8 @@ class SparseLU
//inline const MatrixType& matrixU() const { return m_matrixU; }
template<typename BDerived, typename XDerived>
bool solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>* x) const;
bool solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>* x,
const int transposed = SvNoTrans) const;
/** \returns true if the factorization succeeded */
inline bool succeeded(void) const { return m_succeeded; }
@@ -136,10 +143,17 @@ void SparseLU<MatrixType,Backend>::compute(const MatrixType& a)
ei_assert(false && "not implemented yet");
}
/** Computes *x = U^-1 L^-1 b */
/** Computes *x = U^-1 L^-1 b
*
* If \a transpose is set to SvTranspose or SvAdjoint, the solution
* of the transposed/adjoint system is computed instead.
*
* Not all backends implement the solution of the transposed or
* adjoint system.
*/
template<typename MatrixType, int Backend>
template<typename BDerived, typename XDerived>
bool SparseLU<MatrixType,Backend>::solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>* x) const
bool SparseLU<MatrixType,Backend>::solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>* x, const int transposed) const
{
ei_assert(false && "not implemented yet");
return false;

View File

@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -62,7 +62,7 @@ class SparseMatrix
// FIXME: why are these operator already alvailable ???
// EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(SparseMatrix, *=)
// EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(SparseMatrix, /=)
typedef MappedSparseMatrix<Scalar,Flags> Map;
protected:
@@ -79,7 +79,7 @@ class SparseMatrix
inline int rows() const { return IsRowMajor ? m_outerSize : m_innerSize; }
inline int cols() const { return IsRowMajor ? m_innerSize : m_outerSize; }
inline int innerSize() const { return m_innerSize; }
inline int outerSize() const { return m_outerSize; }
inline int innerNonZeros(int j) const { return m_outerIndex[j+1]-m_outerIndex[j]; }
@@ -118,34 +118,36 @@ class SparseMatrix
class InnerIterator;
/** Removes all non zeros */
inline void setZero()
{
m_data.clear();
//if (m_outerSize)
memset(m_outerIndex, 0, (m_outerSize+1)*sizeof(int));
// for (int i=0; i<m_outerSize; ++i)
// m_outerIndex[i] = 0;
// if (m_outerSize)
// m_outerIndex[i] = 0;
}
/** \returns the number of non zero coefficients */
inline int nonZeros() const { return m_data.size(); }
/** Initializes the filling process of \c *this.
/** \deprecated use setZero() and reserve()
* Initializes the filling process of \c *this.
* \param reserveSize approximate number of nonzeros
* Note that the matrix \c *this is zero-ed.
*/
inline void startFill(int reserveSize = 1000)
EIGEN_DEPRECATED void startFill(int reserveSize = 1000)
{
// std::cerr << this << " startFill\n";
setZero();
m_data.reserve(reserveSize);
}
/**
/** Preallocates \a reserveSize non zeros */
inline void reserve(int reserveSize)
{
m_data.reserve(reserveSize);
}
/** \deprecated use insert()
*/
inline Scalar& fill(int row, int col)
EIGEN_DEPRECATED Scalar& fill(int row, int col)
{
const int outer = IsRowMajor ? row : col;
const int inner = IsRowMajor ? col : row;
@@ -161,6 +163,11 @@ class SparseMatrix
}
m_outerIndex[outer+1] = m_outerIndex[outer];
}
else
{
ei_assert(m_data.index(m_data.size()-1)<inner && "wrong sorted insertion");
}
// std::cerr << size_t(m_outerIndex[outer+1]) << " == " << m_data.size() << "\n";
assert(size_t(m_outerIndex[outer+1]) == m_data.size());
int id = m_outerIndex[outer+1];
++m_outerIndex[outer+1];
@@ -169,44 +176,135 @@ class SparseMatrix
return m_data.value(id);
}
/** Like fill() but with random inner coordinates.
//--- low level purely coherent filling ---
inline Scalar& insertBack(int outer, int inner)
{
ei_assert(size_t(m_outerIndex[outer+1]) == m_data.size() && "wrong sorted insertion");
ei_assert( (m_outerIndex[outer+1]-m_outerIndex[outer]==0 || m_data.index(m_data.size()-1)<inner) && "wrong sorted insertion");
int id = m_outerIndex[outer+1];
++m_outerIndex[outer+1];
m_data.append(0, inner);
return m_data.value(id);
}
inline void startVec(int outer)
{
ei_assert(m_outerIndex[outer]==int(m_data.size()) && "you must call startVec on each inner vec");
ei_assert(m_outerIndex[outer+1]==0 && "you must call startVec on each inner vec");
m_outerIndex[outer+1] = m_outerIndex[outer];
}
//---
/** \deprecated use insert()
* Like fill() but with random inner coordinates.
*/
inline Scalar& fillrand(int row, int col)
EIGEN_DEPRECATED Scalar& fillrand(int row, int col)
{
return insert(row,col);
}
/** \returns a reference to a novel non zero coefficient with coordinates \a row x \a col.
* The non zero coefficient must \b not already exist.
*
* \warning This function can be extremely slow if the non zero coefficients
* are not inserted in a coherent order.
*
* After an insertion session, you should call the finalize() function.
*/
EIGEN_DONT_INLINE Scalar& insert(int row, int col)
{
const int outer = IsRowMajor ? row : col;
const int inner = IsRowMajor ? col : row;
int previousOuter = outer;
if (m_outerIndex[outer+1]==0)
{
// we start a new inner vector
// nothing special to do here
int i = outer;
while (i>=0 && m_outerIndex[i]==0)
while (previousOuter>=0 && m_outerIndex[previousOuter]==0)
{
m_outerIndex[i] = m_data.size();
--i;
m_outerIndex[previousOuter] = m_data.size();
--previousOuter;
}
m_outerIndex[outer+1] = m_outerIndex[outer];
}
assert(size_t(m_outerIndex[outer+1]) == m_data.size() && "invalid outer index");
// here we have to handle the tricky case where the outerIndex array
// starts with: [ 0 0 0 0 0 1 ...] and we are inserting in, e.g.,
// the 2nd inner vector...
bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0))
&& (size_t(m_outerIndex[outer+1]) == m_data.size());
size_t startId = m_outerIndex[outer];
// FIXME let's make sure sizeof(long int) == sizeof(size_t)
size_t id = m_outerIndex[outer+1];
++m_outerIndex[outer+1];
float reallocRatio = 1;
if (m_data.allocatedSize()<id+1)
if (m_data.allocatedSize()<=m_data.size())
{
// we need to reallocate the data, to reduce multiple reallocations
// we use a smart resize algorithm based on the current filling ratio
// we use float to avoid overflows
float nnzEstimate = float(m_outerIndex[outer])*float(m_outerSize)/float(outer);
reallocRatio = (nnzEstimate-float(m_data.size()))/float(m_data.size());
// let's bounds the realloc ratio to
// 1) reduce multiple minor realloc when the matrix is almost filled
// 2) avoid to allocate too much memory when the matrix is almost empty
reallocRatio = std::min(std::max(reallocRatio,1.5f),8.f);
// if there is no preallocated memory, let's reserve a minimum of 32 elements
if (m_data.size()==0)
{
m_data.reserve(32);
}
else
{
// we need to reallocate the data, to reduce multiple reallocations
// we use a smart resize algorithm based on the current filling ratio
// in addition, we use float to avoid integers overflows
float nnzEstimate = float(m_outerIndex[outer])*float(m_outerSize)/float(outer+1);
reallocRatio = (nnzEstimate-float(m_data.size()))/float(m_data.size());
// furthermore we bound the realloc ratio to:
// 1) reduce multiple minor realloc when the matrix is almost filled
// 2) avoid to allocate too much memory when the matrix is almost empty
reallocRatio = std::min(std::max(reallocRatio,1.5f),8.f);
}
}
m_data.resize(m_data.size()+1,reallocRatio);
if (!isLastVec)
{
if (previousOuter==-1)
{
// oops wrong guess.
// let's correct the outer offsets
for (int k=0; k<=(outer+1); ++k)
m_outerIndex[k] = 0;
int k=outer+1;
while(m_outerIndex[k]==0)
m_outerIndex[k++] = 1;
while (k<=m_outerSize && m_outerIndex[k]!=0)
m_outerIndex[k++]++;
id = 0;
--k;
k = m_outerIndex[k]-1;
while (k>0)
{
m_data.index(k) = m_data.index(k-1);
m_data.value(k) = m_data.value(k-1);
k--;
}
}
else
{
// we are not inserting into the last inner vec
// update outer indices:
int j = outer+2;
while (j<=m_outerSize && m_outerIndex[j]!=0)
m_outerIndex[j++]++;
--j;
// shift data of last vecs:
int k = m_outerIndex[j]-1;
while (k>=int(id))
{
m_data.index(k) = m_data.index(k-1);
m_data.value(k) = m_data.value(k-1);
k--;
}
}
}
m_data.resize(id+1,reallocRatio);
while ( (id > startId) && (m_data.index(id-1) > inner) )
{
@@ -214,12 +312,16 @@ class SparseMatrix
m_data.value(id) = m_data.value(id-1);
--id;
}
m_data.index(id) = inner;
return (m_data.value(id) = 0);
}
inline void endFill()
EIGEN_DEPRECATED void endFill() { finalize(); }
/** Must be called after inserting a set of non zero entries.
*/
inline void finalize()
{
int size = m_data.size();
int i = m_outerSize;
@@ -233,7 +335,7 @@ class SparseMatrix
++i;
}
}
void prune(Scalar reference, RealScalar epsilon = precision<RealScalar>())
{
int k = 0;
@@ -390,11 +492,11 @@ class SparseMatrix
s << std::endl;
s << std::endl;
s << "Column pointers:\n";
for (int i=0; i<m.cols(); ++i)
for (int i=0; i<m.outerSize(); ++i)
{
s << m.m_outerIndex[i] << " ";
}
s << std::endl;
s << " $" << std::endl;
s << std::endl;
);
s << static_cast<const SparseMatrixBase<SparseMatrix>&>(m);

Some files were not shown because too many files have changed in this diff Show More